-
Notifications
You must be signed in to change notification settings - Fork 35
Use MirroredArray instead of numpy arrays to reduce boilerplate #671
Changes from all commits
1beaaad
0031b5d
a11eccf
7fcc40a
911a88e
1438aa5
16fe65f
4639a81
5d3a8b7
96f54ce
2924b35
6975ab0
9a8baa6
ba02a5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import enum | ||
|
||
import numpy as np | ||
import pyopencl | ||
from petsc4py import PETSc | ||
|
||
from pyop2.configuration import configuration | ||
from pyop2.offload_utils import _offloading as offloading | ||
from pyop2.types.access import READ, RW, WRITE, INC | ||
from pyop2.utils import verify_reshape | ||
|
||
|
||
class Status(enum.Enum): | ||
"""where is the up-to-date version?""" | ||
ON_HOST = enum.auto() | ||
ON_DEVICE = enum.auto() | ||
ON_BOTH = enum.auto() | ||
|
||
|
||
ON_HOST = Status.ON_HOST | ||
ON_DEVICE = Status.ON_DEVICE | ||
ON_BOTH = Status.ON_BOTH | ||
|
||
|
||
class MirroredArray: | ||
"""An array that is available on both host and device, copying values where necessary.""" | ||
|
||
def __init__(self, data, dtype, shape): | ||
# TODO This could be allocated lazily | ||
self.dtype = np.dtype(dtype) | ||
if data is None: | ||
self._host_data = np.zeros(shape, dtype=dtype) | ||
else: | ||
self._host_data = verify_reshape(data, dtype, shape) | ||
self.availability = ON_BOTH | ||
|
||
@classmethod | ||
def new(cls, *args, **kwargs): | ||
if configuration["backend"] == "CPU_ONLY": | ||
return CPUOnlyArray(*args, **kwargs) | ||
elif configuration["backend"] == "OPENCL": | ||
return OpenCLArray(*args, **kwargs) | ||
else: | ||
raise ValueError | ||
|
||
@property | ||
def kernel_arg(self): | ||
# lazy but for now assume that the data is always modified if we access | ||
# the pointer | ||
Comment on lines
+48
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree we should do this right now. But probably we could patch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the parloop does tell you the access type? otherwise nothing would work There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A simple solution here would be to replace |
||
return self.device_ptr if offloading else self.host_ptr | ||
|
||
# if offloading: | ||
# # N.B. not considering MAX, MIN here | ||
# if access in {RW, WRITE, INC}: | ||
# return self.device_ptr | ||
# else: | ||
# return self.device_ptr_ro | ||
# else: | ||
# # N.B. not considering MAX, MIN here | ||
# if access in {RW, WRITE, INC}: | ||
# return self.host_ptr | ||
# else: | ||
# return self.host_ptr_ro | ||
|
||
@property | ||
def is_available_on_device(self): | ||
return self.availability in {ON_DEVICE, ON_BOTH} | ||
|
||
def ensure_availability_on_device(self): | ||
if not self.is_available_on_device: | ||
self.host_to_device_copy() | ||
|
||
@property | ||
def is_available_on_host(self): | ||
return self.availability in {ON_HOST, ON_BOTH} | ||
|
||
def ensure_availability_on_host(self): | ||
if not self.is_available_on_host: | ||
self.device_to_host_copy() | ||
|
||
@property | ||
def vec(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Subclasses would need to implement the right thing here. I have not tried to make my code work for |
||
raise NotImplementedError | ||
# if offload == "cpu": | ||
# return PETSc.Vec().createMPI(...) | ||
# elif offload == "gpu": | ||
# if backend == "cuda": | ||
# return PETSc.Vec().createCUDA(..) | ||
# ... | ||
|
||
|
||
@property | ||
def data(self): | ||
self.ensure_availability_on_host() | ||
self.availability = ON_HOST | ||
v = self._host_data.view() | ||
v.setflags(write=True) | ||
return v | ||
Comment on lines
+94
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would force a device->host, which isn't necessary and would be performance limiting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a device -> host copy is needed here as otherwise the numpy array you get back might be wrong? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO we should return the array on the device (cl.array.Array, pycuda.GPUArray) whenever we are in the offloading context. They allow us to perform numpy-like operations on the device, IMO no good reason for returning the numpy array. Wdyt? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes a lot of sense. I was assuming here that every time we called this function we would want to inspect the data, which would require a copy to the device. |
||
|
||
@property | ||
def data_ro(self): | ||
self.ensure_availability_on_host() | ||
v = self._host_data.view() | ||
v.setflags(write=False) | ||
return v | ||
|
||
@property | ||
def host_ptr(self): | ||
self.ensure_availability_on_host() | ||
self.availability = ON_HOST | ||
return self.data.ctypes.data | ||
|
||
@property | ||
def host_ptr_ro(self): | ||
self.ensure_availability_on_host() | ||
return self.data.ctypes.data | ||
|
||
|
||
class CPUOnlyArray(MirroredArray): | ||
|
||
@property | ||
def device_ptr(self): | ||
return self.host_ptr | ||
|
||
@property | ||
def device_ptr_ro(self): | ||
return self.host_ptr_ro | ||
|
||
def host_to_device_copy(self): | ||
pass | ||
|
||
def device_to_host_copy(self): | ||
pass | ||
|
||
|
||
if configuration["backend"] == "OPENCL": | ||
# TODO: Instruct the user to pass | ||
# -viennacl_backend opencl | ||
# -viennacl_opencl_device_type gpu | ||
# create a dummy vector and extract its associated command queue | ||
x = PETSc.Vec().create(PETSc.COMM_WORLD) | ||
x.setType("viennacl") | ||
x.setSizes(size=1) | ||
queue_ptr = x.getCLQueueHandle() | ||
cl_queue = pyopencl.CommandQueue.from_int_ptr(queue_ptr, retain=False) | ||
Comment on lines
+136
to
+145
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was just a hack to create a global queue object since I think a
Comment on lines
+136
to
+145
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming this would go away. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possibly. What I'm trying to illustrate here is that we can do without the |
||
|
||
|
||
class OpenCLArray(MirroredArray): | ||
|
||
def __init__(self, data, shape, dtype): | ||
if data is None: | ||
self._device_data = pyopencl.array.empty(cl_queue, shape, dtype) | ||
else: | ||
self._device_data = pyopencl.array.to_device(cl_queue, data) | ||
Comment on lines
+151
to
+154
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Minor]: This is probably missing a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. Good spot. |
||
|
||
@property | ||
def device_ptr(self): | ||
self.ensure_availability_on_device() | ||
self.availability = ON_DEVICE | ||
return self._device_data.data | ||
|
||
@property | ||
def device_ptr_ro(self): | ||
self.ensure_availability_on_device() | ||
return self._device_data.data | ||
|
||
def host_to_device_copy(self): | ||
self._device_data.set(self._host_data) | ||
|
||
def device_to_host_copy(self): | ||
self._device_data.get(self._host_data) | ||
Comment on lines
+167
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic here would be different based on where this is a Dat or (Map|Global). For There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose. Perhaps a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
class _not_implemented: # noqa | ||
"""Not Implemented""" | ||
|
||
|
||
class AbstractComputeBackend: | ||
""" | ||
Abstract class to record all the backend specific implementation of | ||
:mod:`pyop2`'s data structures. | ||
""" | ||
GlobalKernel = _not_implemented | ||
Parloop = _not_implemented | ||
Set = _not_implemented | ||
ExtrudedSet = _not_implemented | ||
MixedSet = _not_implemented | ||
Subset = _not_implemented | ||
DataSet = _not_implemented | ||
MixedDataSet = _not_implemented | ||
Map = _not_implemented | ||
MixedMap = _not_implemented | ||
Dat = _not_implemented | ||
MixedDat = _not_implemented | ||
DatView = _not_implemented | ||
Mat = _not_implemented | ||
Global = _not_implemented | ||
GlobalDataSet = _not_implemented | ||
PETScVecType = _not_implemented | ||
|
||
def __getattribute__(self, key): | ||
val = super().__getattribute__(key) | ||
if val is _not_implemented: | ||
raise NotImplementedError(f"'{val}' is not implemented for backend" | ||
f" '{type(self).__name__}'.") | ||
return val | ||
|
||
def turn_on_offloading(self): | ||
raise NotImplementedError() | ||
|
||
def turn_off_offloading(self): | ||
raise NotImplementedError() | ||
|
||
@property | ||
def cache_key(self): | ||
raise NotImplementedError() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is the availability
ON_BOTH
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is probably wrong if
data is not None
. For simplicity I was assuming that the array would be initialised with a valid copy on both host and device. This definitely doesn't need to be the case (and I doubt I have implemented it correctly anyway).