diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 09e4d0566..831d445ce 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -33,7 +33,7 @@ import numpy as np from pytools import memoize, memoize_method -from pyopencl._cl import bitlog2 # noqa: F401 +from pyopencl._cl import bitlog2, get_cl_header_version # noqa: F401 from pytools.persistent_dict import KeyBuilder as KeyBuilderBase import re @@ -59,10 +59,19 @@ def _register_types(): # {{{ imported names from pyopencl._cl import ( # noqa - PooledBuffer as PooledBuffer, + _tools_PooledBuffer as PooledBuffer, _tools_DeferredAllocator as DeferredAllocator, _tools_ImmediateAllocator as ImmediateAllocator, - MemoryPool as MemoryPool) + _tools_MemoryPool as MemoryPool, + ) + + +if get_cl_header_version() >= (2, 0): + from pyopencl._cl import ( # noqa + _tools_SVMemoryPool as SVMemoryPool, + _tools_PooledSVM as PooledSVM, + _tools_SVMAllocator as SVMAllocator, + ) # }}} diff --git a/src/mempool.hpp b/src/mempool.hpp index 44f0fd643..812bd2f1c 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -233,7 +233,8 @@ namespace PYGPU_PACKAGE std::cout << "[pool] allocation of size " << size << " served from bin " << bin_nr << " which contained " << bin.size() << " entries" << std::endl; - return pop_block_from_bin(bin, size); + return m_allocator->hand_out_existing_block( + pop_block_from_bin(bin, size)); } size_type alloc_sz = alloc_size(bin_nr); @@ -256,7 +257,8 @@ namespace PYGPU_PACKAGE m_allocator->try_release_blocks(); if (bin.size()) - return pop_block_from_bin(bin, size); + return m_allocator->hand_out_existing_block( + pop_block_from_bin(bin, size)); if (m_trace) std::cout << "[pool] allocation still OOM after GC" << std::endl; @@ -282,7 +284,7 @@ namespace PYGPU_PACKAGE "failed to free memory for allocation"); } - void free(pointer_type p, size_type size) + void free(pointer_type &&p, size_type size) { --m_active_blocks; m_active_bytes -= size; @@ -291,7 +293,7 @@ namespace PYGPU_PACKAGE if (!m_stop_holding) { inc_held_blocks(); - get_bin(bin_nr).push_back(p); + get_bin(bin_nr).push_back(std::move(p)); if (m_trace) std::cout << "[pool] block of size " << size << " returned to bin " @@ -300,7 +302,7 @@ namespace PYGPU_PACKAGE } else { - m_allocator->free(p); + m_allocator->free(std::move(p)); m_managed_bytes -= alloc_size(bin_nr); } } @@ -313,7 +315,7 @@ namespace PYGPU_PACKAGE while (bin.size()) { - m_allocator->free(bin.back()); + m_allocator->free(std::move(bin.back())); m_managed_bytes -= alloc_size(bin_pair.first); bin.pop_back(); @@ -353,7 +355,7 @@ namespace PYGPU_PACKAGE if (bin.size()) { - m_allocator->free(bin.back()); + m_allocator->free(std::move(bin.back())); m_managed_bytes -= alloc_size(bin_pair.first); bin.pop_back(); @@ -379,7 +381,7 @@ namespace PYGPU_PACKAGE pointer_type pop_block_from_bin(bin_t &bin, size_type size) { - pointer_type result = bin.back(); + pointer_type result(std::move(bin.back())); bin.pop_back(); dec_held_blocks(); @@ -399,7 +401,7 @@ namespace PYGPU_PACKAGE typedef typename Pool::pointer_type pointer_type; typedef typename Pool::size_type size_type; - private: + protected: PYGPU_SHARED_PTR m_pool; pointer_type m_ptr; @@ -421,7 +423,7 @@ namespace PYGPU_PACKAGE { if (m_valid) { - m_pool->free(m_ptr, m_size); + m_pool->free(std::move(m_ptr), m_size); m_valid = false; } else @@ -435,16 +437,8 @@ namespace PYGPU_PACKAGE #endif ); } - - pointer_type ptr() const - { return m_ptr; } - - size_type size() const - { return m_size; } }; } - - #endif diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 76477f55f..ad60372db 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1708,7 +1708,13 @@ namespace pyopencl src.m_valid = false; } - command_queue_ref(const command_queue_ref &) = delete; + command_queue_ref(const command_queue_ref &) + { + throw error("command_queue_ref", CL_INVALID_VALUE, + "command_queue_ref copy constructor is never supposed to be called; " + "all notional invocations should be eliminated because of NRVO"); + } + command_queue_ref &operator=(const command_queue_ref &) = delete; ~command_queue_ref() @@ -3545,7 +3551,7 @@ namespace pyopencl class svm_pointer { public: - virtual void *ptr() const = 0; + virtual void *svm_ptr() const = 0; // may throw size_not_available virtual size_t size() const = 0; }; @@ -3578,7 +3584,7 @@ namespace pyopencl m_size = ward->m_buf.len; } - void *ptr() const + void *svm_ptr() const { return m_ptr; } @@ -3674,7 +3680,7 @@ namespace pyopencl m_allocation = nullptr; } - void *ptr() const + void *svm_ptr() const { return m_allocation; } @@ -3701,7 +3707,7 @@ namespace pyopencl void bind_to_queue(command_queue const &queue) { - if (is_queue_out_of_order(m_queue.data())) + if (is_queue_out_of_order(queue.data())) throw error("SVMAllocation.bind_to_queue", CL_INVALID_VALUE, "supplying an out-of-order queue to SVMAllocation is invalid"); @@ -3794,7 +3800,7 @@ namespace pyopencl ( cq.data(), is_blocking, - dst.ptr(), src.ptr(), + dst.svm_ptr(), src.svm_ptr(), size, PYOPENCL_WAITLIST_ARGS, &evt @@ -3856,7 +3862,7 @@ namespace pyopencl clEnqueueSVMMemFill, ( cq.data(), - dst.ptr(), pattern_ptr, + dst.svm_ptr(), pattern_ptr, pattern_len, size, PYOPENCL_WAITLIST_ARGS, @@ -3913,7 +3919,7 @@ namespace pyopencl cq.data(), is_blocking, flags, - svm.ptr(), size, + svm.svm_ptr(), size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3936,7 +3942,7 @@ namespace pyopencl clEnqueueSVMUnmap, ( cq.data(), - svm.ptr(), + svm.svm_ptr(), PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3964,7 +3970,7 @@ namespace pyopencl { svm_pointer &svm(py::cast(py_svm)); - svm_pointers.push_back(svm.ptr()); + svm_pointers.push_back(svm.svm_ptr()); sizes.push_back(svm.size()); } @@ -4760,7 +4766,7 @@ namespace pyopencl void set_arg_svm(cl_uint arg_index, svm_pointer const &wrp) { PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, - (m_kernel, arg_index, wrp.ptr())); + (m_kernel, arg_index, wrp.svm_ptr())); } #endif diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 453f34c38..3c40e1c90 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -298,7 +298,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef svm_pointer cls; py::class_(m, "SVMPointer", py::dynamic_attr()) - .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) + .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.svm_ptr(); }) .def("_size", [](cls &self) -> py::object { try @@ -336,7 +336,7 @@ void pyopencl_expose_part_2(py::module &m) "|std-enqueue-blurb|") .def(py::self == py::self) .def(py::self != py::self) - .def("__hash__", [](cls &self) { return (intptr_t) self.ptr(); }) + .def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); }) .DEF_SIMPLE_METHOD(bind_to_queue) .DEF_SIMPLE_METHOD(unbind_from_queue) ; diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 35630b036..429e3e1b9 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -57,12 +57,18 @@ namespace { return false; } + virtual pointer_type allocate(size_type s) { return nullptr; } - void free(pointer_type p) + virtual pointer_type hand_out_existing_block(pointer_type &&p) + { + return p; + } + + void free(pointer_type &&p) { } void try_release_blocks() @@ -70,16 +76,16 @@ namespace }; - // {{{ cl allocators + // {{{ buffer allocators - class cl_allocator_base + class buffer_allocator_base { protected: std::shared_ptr m_context; cl_mem_flags m_flags; public: - cl_allocator_base(std::shared_ptr const &ctx, + buffer_allocator_base(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { @@ -88,21 +94,26 @@ namespace "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); } - cl_allocator_base(cl_allocator_base const &src) + buffer_allocator_base(buffer_allocator_base const &src) : m_context(src.m_context), m_flags(src.m_flags) { } - virtual ~cl_allocator_base() + virtual ~buffer_allocator_base() { } typedef cl_mem pointer_type; typedef size_t size_type; - virtual cl_allocator_base *copy() const = 0; + virtual buffer_allocator_base *copy() const = 0; virtual bool is_deferred() const = 0; virtual pointer_type allocate(size_type s) = 0; - void free(pointer_type p) + virtual pointer_type hand_out_existing_block(pointer_type p) + { + return p; + } + + void free(pointer_type &&p) { PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p)); } @@ -113,20 +124,21 @@ namespace } }; - class cl_deferred_allocator : public cl_allocator_base + + class deferred_buffer_allocator : public buffer_allocator_base { private: - typedef cl_allocator_base super; + typedef buffer_allocator_base super; public: - cl_deferred_allocator(std::shared_ptr const &ctx, + deferred_buffer_allocator(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(ctx, flags) { } - cl_allocator_base *copy() const + buffer_allocator_base *copy() const { - return new cl_deferred_allocator(*this); + return new deferred_buffer_allocator(*this); } bool is_deferred() const @@ -143,26 +155,26 @@ namespace const unsigned zero = 0; - class cl_immediate_allocator : public cl_allocator_base + class immediate_buffer_allocator : public buffer_allocator_base { private: - typedef cl_allocator_base super; + typedef buffer_allocator_base super; pyopencl::command_queue m_queue; public: - cl_immediate_allocator(pyopencl::command_queue &queue, + immediate_buffer_allocator(pyopencl::command_queue &queue, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(std::shared_ptr(queue.get_context()), flags), m_queue(queue.data(), /*retain*/ true) { } - cl_immediate_allocator(cl_immediate_allocator const &src) + immediate_buffer_allocator(immediate_buffer_allocator const &src) : super(src), m_queue(src.m_queue) { } - cl_allocator_base *copy() const + buffer_allocator_base *copy() const { - return new cl_immediate_allocator(*this); + return new immediate_buffer_allocator(*this); } bool is_deferred() const @@ -215,10 +227,10 @@ namespace // }}} - // {{{ allocator_call + // {{{ buffer_allocator_call inline - pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) + pyopencl::buffer *buffer_allocator_call(buffer_allocator_base &alloc, size_t size) { cl_mem mem; int try_count = 0; @@ -266,12 +278,12 @@ namespace // {{{ pooled_buffer class pooled_buffer - : public pyopencl::pooled_allocation >, + : public pyopencl::pooled_allocation >, public pyopencl::memory_object_holder { private: typedef - pyopencl::pooled_allocation > + pyopencl::pooled_allocation > super; public: @@ -281,17 +293,22 @@ namespace { } const super::pointer_type data() const - { return ptr(); } + { return m_ptr; } + + size_t size() const + { + return m_size; + } }; // }}} - // {{{{ device_pool_allocate + // {{{ buffer_pool_allocate - pooled_buffer *device_pool_allocate( - std::shared_ptr > pool, - pyopencl::memory_pool::size_type sz) + pooled_buffer *buffer_pool_allocate( + std::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) { return new pooled_buffer(pool, sz); } @@ -299,25 +316,40 @@ namespace // }}} +#if PYOPENCL_CL_VERSION >= 0x2000 + // {{{ svm allocator // FIXME: Does this need deferred and immediate just like the buffer-level // allocators? (I.e. can I tell whether I am out of memory just from allocations?) + struct svm_held_pointer + { + void *ptr; + pyopencl::command_queue_ref queue; + }; + + class svm_allocator { + public: + typedef svm_held_pointer pointer_type; + typedef size_t size_type; + protected: std::shared_ptr m_context; cl_uint m_alignment; - cl_mem_flags m_flags; + cl_svm_mem_flags m_flags; + pyopencl::command_queue_ref m_queue; + public: svm_allocator(std::shared_ptr const &ctx, - cl_uint alignment, cl_mem_flags flags=CL_MEM_READ_WRITE) + cl_uint alignment, cl_svm_mem_flags flags=CL_MEM_READ_WRITE, + pyopencl::command_queue *queue=nullptr) : m_context(ctx), m_alignment(alignment), m_flags(flags) { - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - throw pyopencl::error("Allocator", CL_INVALID_VALUE, - "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + if (queue) + m_queue.set(m_queue.data()); } svm_allocator(svm_allocator const &src) @@ -328,21 +360,61 @@ namespace virtual ~svm_allocator() { } - typedef void *pointer_type; - typedef size_t size_type; + virtual svm_allocator *copy() const + { + return new svm_allocator(m_context, m_alignment, m_flags); + } + + virtual bool is_deferred() const + { + // FIXME: I don't know whether that's true. + return false; + } pointer_type allocate(size_type size) { if (size == 0) - return nullptr; + return { nullptr, nullptr }; PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); - return clSVMAlloc(m_context->data(), m_flags, size, m_alignment); + return { + clSVMAlloc(m_context->data(), m_flags, size, m_alignment), + pyopencl::command_queue_ref(m_queue.data()) + }; + } + + virtual pointer_type hand_out_existing_block(pointer_type &&p) + { + if (m_queue.is_valid()) + { + if (p.queue.is_valid()) + { + // make sure synchronization promises stay valid in new queue + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, (p.queue.data(), &evt)); + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (m_queue.data(), 1, &evt)); + } + p.queue.set(m_queue.data()); + } + return p; } - void free(pointer_type p) + void free(pointer_type &&p) { - clSVMFree(m_context->data(), p); + if (p.queue.is_valid()) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( + p.queue.data(), 1, &p.ptr, + nullptr, nullptr, + 0, nullptr, nullptr)); + p.queue.reset(); + } + else + { + PYOPENCL_PRINT_CALL_TRACE("clSVMFree"); + clSVMFree(m_context->data(), p.ptr); + } } void try_release_blocks() @@ -354,7 +426,99 @@ namespace // }}} + // {{{ svm_allocator_call + + inline + svm_held_pointer svm_allocator_call(svm_allocator &alloc, size_t size) + { + svm_held_pointer mem; + int try_count = 0; + while (true) + { + try + { + return alloc.allocate(size); + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + if (++try_count == 2) + throw; + } + alloc.try_release_blocks(); + } + } + + // }}} + + + // {{{ pooled_svm + + class pooled_svm + : public pyopencl::pooled_allocation>, + public pyopencl::svm_pointer + { + private: + typedef + pyopencl::pooled_allocation> + super; + + public: + pooled_svm( + std::shared_ptr p, super::size_type s) + : super(p, s) + { } + + void *svm_ptr() const + { return m_ptr.ptr; } + + size_t size() const + { return m_size; } + + void bind_to_queue(pyopencl::command_queue const &queue) + { + if (pyopencl::is_queue_out_of_order(queue.data())) + throw pyopencl::error("PooledSVM.bind_to_queue", CL_INVALID_VALUE, + "supplying an out-of-order queue to SVMAllocation is invalid"); + + if (m_ptr.queue.is_valid()) + { + // make sure synchronization promises stay valid in new queue + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_ptr.queue.data(), &evt)); + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (queue.data(), 1, &evt)); + } + + m_ptr.queue.set(queue.data()); + } + + void unbind_from_queue() + { + // NOTE: This absolves the allocation from any synchronization promises + // made. Keeping those before calling this method is the responsibility + // of the user. + m_ptr.queue.reset(); + } + }; + + // }}} + + + // {{{ svm_pool_allocate + + pooled_svm *svm_pool_allocate( + std::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) + { + return new pooled_svm(pool, sz); + } + + // }}} + +#endif template void expose_memory_pool(Wrapper &wrapper) @@ -381,11 +545,11 @@ void pyopencl_expose_mempool(py::module &m) m.def("bitlog2", pyopencl::bitlog2); { - typedef cl_allocator_base cls; + typedef buffer_allocator_base cls; py::class_ wrapper( m, "_tools_AllocatorBase"/*, py::no_init */); wrapper - .def("__call__", allocator_call) + .def("__call__", buffer_allocator_call) ; } @@ -410,8 +574,8 @@ void pyopencl_expose_mempool(py::module &m) } { - typedef cl_deferred_allocator cls; - py::class_ wrapper( + typedef deferred_buffer_allocator cls; + py::class_ wrapper( m, "_tools_DeferredAllocator"); wrapper .def(py::init< @@ -424,8 +588,8 @@ void pyopencl_expose_mempool(py::module &m) } { - typedef cl_immediate_allocator cls; - py::class_ wrapper( + typedef immediate_buffer_allocator cls; + py::class_ wrapper( m, "_tools_ImmediateAllocator"); wrapper .def(py::init()) @@ -435,18 +599,18 @@ void pyopencl_expose_mempool(py::module &m) } { - typedef pyopencl::memory_pool cls; + typedef pyopencl::memory_pool cls; py::class_< cls, /* boost::noncopyable, */ - std::shared_ptr> wrapper( m, "MemoryPool"); + std::shared_ptr> wrapper( m, "_tools_MemoryPool"); wrapper - .def(py::init(), + .def(py::init(), py::arg("allocator"), py::arg("leading_bits_in_bin_id")=4 ) - .def("allocate", device_pool_allocate) - .def("__call__", device_pool_allocate) + .def("allocate", buffer_pool_allocate) + .def("__call__", buffer_pool_allocate) // undoc for now .DEF_SIMPLE_METHOD(set_trace) ; @@ -458,10 +622,64 @@ void pyopencl_expose_mempool(py::module &m) typedef pooled_buffer cls; py::class_( - m, "PooledBuffer"/* , py::no_init */) + m, "_tools_PooledBuffer"/* , py::no_init */) + .def("release", &cls::free) + // undocumented for now, for consistency with SVM + .def("bind_to_queue", [](cls &self, pyopencl::command_queue &queue) { /* no-op */ }) + .def("unbind_from_queue", [](cls &self) { /* no-op */ }) + ; + } + +#if PYOPENCL_CL_VERSION >= 0x2000 + { + typedef pyopencl::memory_pool cls; + + py::class_< + cls, /* boost::noncopyable, */ + std::shared_ptr> wrapper( m, "_tools_SVMemoryPool"); + wrapper + .def(py::init(), + py::arg("allocator"), + py::arg("leading_bits_in_bin_id")=4 + ) + .def("allocate", svm_pool_allocate) + .def("__call__", svm_pool_allocate) + // undoc for now + .DEF_SIMPLE_METHOD(set_trace) + ; + + expose_memory_pool(wrapper); + } + + { + typedef pooled_svm cls; + py::class_( + m, "_tools_PooledSVM"/* , py::no_init */) .def("release", &cls::free) + .def("__eq__", [](const cls &self, const cls &other) + { return self.svm_ptr() == other.svm_ptr(); }) + .def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); }) + .DEF_SIMPLE_METHOD(bind_to_queue) + .DEF_SIMPLE_METHOD(unbind_from_queue) + ; + } + + { + typedef svm_allocator cls; + py::class_ wrapper( + m, "_tools_SVMAllocator"); + wrapper + .def(py::init const &, cl_uint, cl_uint, pyopencl::command_queue *>(), + py::arg("context"), + py::arg("alignment"), + py::arg("flags")=CL_MEM_READ_WRITE, + py::arg("command_queue").none(true)=nullptr + ) + .def("__call__", svm_allocator_call) ; } +#endif } // vim: foldmethod=marker