Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: inducer/pyopencl
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 01be96d7c515f33c6198e9790d8443711a104395
Choose a base ref
..
head repository: inducer/pyopencl
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 81f06d65e9aa40db827bc595f6547aa4c96d684d
Choose a head ref
Showing with 166 additions and 46 deletions.
  1. +2 −0 doc/runtime_memory.rst
  2. +35 −13 pyopencl/__init__.py
  3. +2 −2 pyopencl/array.py
  4. +19 −0 pyopencl/tools.py
  5. +107 −31 src/wrap_cl.hpp
  6. +1 −0 src/wrap_cl_part_2.cpp
2 changes: 2 additions & 0 deletions doc/runtime_memory.rst
Original file line number Diff line number Diff line change
@@ -281,6 +281,8 @@ Transfers

.. autofunction:: enqueue_copy(queue, dest, src, **kwargs)

.. autofunction:: enqueue_fill(queue, dest, src, **kwargs)

Mapping Memory into Host Address Space
--------------------------------------

48 changes: 35 additions & 13 deletions pyopencl/__init__.py
Original file line number Diff line number Diff line change
@@ -21,6 +21,8 @@
"""

from sys import intern
from warnings import warn
from typing import Union, Any

from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa

@@ -43,7 +45,6 @@
import os
from os.path import dirname, join, realpath
if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
from warnings import warn
warn("It looks like you are importing PyOpenCL from "
"its source directory. This likely won't work.")
raise
@@ -267,7 +268,6 @@ class CommandQueueUsedAfterExit(UserWarning):

def compiler_output(text):
import os
from warnings import warn
if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")):
warn(text, CompilerWarning)
else:
@@ -389,7 +389,6 @@ def enable_debugging(platform_or_context):
import os
os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
else:
from warnings import warn
warn("do not know how to enable debugging on '%s'"
% platform.name)

@@ -428,7 +427,6 @@ def _get_prg(self):
return self._prg
else:
# "no program" can only happen in from-source case.
from warnings import warn
warn("Pre-build attribute access defeats compiler caching.",
stacklevel=3)

@@ -662,7 +660,6 @@ def device_hashable_model_and_version_identifier(self):
return ("v1", self.vendor, self.vendor_id, self.name, self.version)

def device_persistent_unique_id(self):
from warnings import warn
warn("Device.persistent_unique_id is deprecated. "
"Use Device.hashable_model_and_version_identifier instead.",
DeprecationWarning, stacklevel=2)
@@ -684,7 +681,6 @@ def device_persistent_unique_id(self):

def context_init(self, devices, properties, dev_type, cache_dir=None):
if cache_dir is not None:
from warnings import warn
warn("The 'cache_dir' argument to the Context constructor "
"is deprecated and no longer has an effect. "
"It was removed because it only applied to the wrapper "
@@ -970,7 +966,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None,

if hostbuf is not None and not \
(flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
from warnings import warn
warn("'hostbuf' was passed, but no memory flags to make use of it.")

if hostbuf is None and pitches is not None:
@@ -1043,7 +1038,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None,

class _ImageInfoGetter:
def __init__(self, event):
from warnings import warn
warn("Image.image.attr is deprecated and will go away in 2021. "
"Use Image.attr directly, instead.")

@@ -1152,12 +1146,13 @@ def memory_map_exit(self, exc_type, exc_val, exc_tb):
if get_cl_header_version() >= (2, 0):
svmallocation_old_init = SVMAllocation.__init__

def svmallocation_init(self, ctx, size, alignment, flags, _interface=None):
def svmallocation_init(self, ctx, size, alignment, flags, _interface=None,
queue=None):
"""
:arg ctx: a :class:`Context`
:arg flags: some of :class:`svm_mem_flags`.
"""
svmallocation_old_init(self, ctx, size, alignment, flags)
svmallocation_old_init(self, ctx, size, alignment, flags, queue)

# mem_flags.READ_ONLY applies to kernels, not the host
read_write = True
@@ -1784,6 +1779,9 @@ def enqueue_copy(queue, dest, src, **kwargs):
src = SVM(src)

is_blocking = kwargs.pop("is_blocking", True)
assert kwargs.pop("src_offset", 0) == 0
assert kwargs.pop("dest_offset", 0) == 0
assert "byte_count" not in kwargs or kwargs.pop("byte_count") == src._size()
return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)

else:
@@ -1822,6 +1820,26 @@ def enqueue_copy(queue, dest, src, **kwargs):
# }}}


# {{{ enqueue_fill

def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVM],
pattern: Any, size: int, *, offset: int = 0, wait_for=None) -> Event:
"""
.. versionadded:: 2022.2
"""
if isinstance(dest, MemoryObjectHolder):
return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for)
elif isinstance(dest, SVM):
if offset:
raise NotImplementedError("enqueue_fill with SVM does not yet support "
"offsets")
return enqueue_svm_memfill(queue, dest, pattern, size, wait_for)
else:
raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'")

# }}}


# {{{ image creation

DTYPE_TO_CHANNEL_TYPE = {
@@ -1927,7 +1945,6 @@ def enqueue_barrier(queue, wait_for=None):

def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)):
from warnings import warn
warn("The context for this queue does not declare OpenCL 1.2 support, so "
"the next thing you might see is a crash")

@@ -1983,7 +2000,7 @@ def enqueue_svm_migratemem(queue, svms, flags, wait_for=None):
wait_for)


def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None):
"""Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
The array will be allocated in shared virtual memory belonging
@@ -2001,6 +2018,10 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
will likely want to wrap the returned array in an :class:`SVM` tag.
.. versionadded:: 2016.2
.. versionchanged:: 2022.2
*queue* argument added.
"""

dtype = np.dtype(dtype)
@@ -2047,7 +2068,8 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
if alignment is None:
alignment = itemsize

svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface)
svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface,
queue=queue)
return np.asarray(svm_alloc)


4 changes: 2 additions & 2 deletions pyopencl/array.py
Original file line number Diff line number Diff line change
@@ -1468,8 +1468,8 @@ def _zero_fill(self, queue=None, wait_for=None):
# https://github.com/inducer/pyopencl/issues/395
if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
self.add_event(
cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0),
self.offset, self.nbytes, wait_for=wait_for))
cl.enqueue_fill(queue, self.base_data, np.int8(0),
self.nbytes, offset=self.offset, wait_for=wait_for))
else:
zero = np.zeros((), self.dtype)
self.fill(zero, queue=queue)
19 changes: 19 additions & 0 deletions pyopencl/tools.py
Original file line number Diff line number Diff line change
@@ -67,6 +67,25 @@ def _register_types():
# }}}


# {{{ svm allocator

# FIXME: Replace me with C++
class SVMAllocator:
def __init__(self, ctx, flags, *, alignment=0, queue=None):
self._context = ctx
self._flags = flags
self._alignment = alignment
self._queue = queue

def __call__(self, nbytes):
import pyopencl as cl
return cl.SVM(cl.svm_empty(
self._context, self._flags, (nbytes,), np.int8, "C", self._alignment,
self._queue))

# }}}


# {{{ first-arg caches

_first_arg_dependent_caches = []
Loading