bump verison, merge pull request #17 from AMYPAD/devel

AMYPAD · Nov 1, 2021 · ef2a25b · ef2a25b
2 parents baa3bf0 + 6118807
commit ef2a25b
Show file tree

Hide file tree

Showing 11 changed files with 168 additions and 31 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -55,6 +55,14 @@ jobs:
     - run: pip install -U -e .[dev]
     - run: pytest
     - uses: codecov/codecov-action@v1
+    - name: compile -Wall
+      run: |
+        git clean -Xdf
+        pip install toml
+        python -c 'import toml; c=toml.load("pyproject.toml"); print("\0".join(c["build-system"]["requires"]), end="")' \
+          | xargs -0 pip install
+        python setup.py build -- -DCUVEC_DEBUG=1 \
+          -DCMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
   cuda:
     if: github.event_name != 'pull_request' || github.repository_owner != 'AMYPAD'
     name: CUDA py${{ matrix.python }}
@@ -71,6 +79,14 @@ jobs:
     - run: pip install -U -e .[dev]
     - run: pytest
     - uses: codecov/codecov-action@v1
+    - name: compile -Wall
+      run: |
+        git clean -Xdf
+        pip install toml
+        python -c 'import toml; c=toml.load("pyproject.toml"); print("\0".join(c["build-system"]["requires"]), end="")' \
+          | xargs -0 pip install
+        python setup.py build -- -DCUVEC_DEBUG=1 \
+          -DCMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
     - name: Post Run setup-python
       run: setup-python -p${{ matrix.python }} -Dr
       if: ${{ always() }}

diff --git a/cuvec/__init__.py b/cuvec/__init__.py
@@ -25,9 +25,10 @@
     # data
     'typecodes', 'vec_types'] # yapf: disable
 
-from pathlib import Path
-
-from pkg_resources import resource_filename
+try:          # py<3.9
+    import importlib_resources as resources
+except ImportError:
+    from importlib import resources
 
 try:
     from .cuvec import dev_sync
@@ -37,6 +38,8 @@
 else:
     from .pycuvec import CuVec, asarray, copy, cu_copy, cu_zeros, typecodes, vec_types, zeros
 
+p = resources.files('cuvec').resolve()
+# for C++/CUDA/SWIG includes
+include_path = p / 'include'
 # for use in `cmake -DCMAKE_PREFIX_PATH=...`
-cmake_prefix = Path(resource_filename(__name__, "cmake")).resolve()
-include_path = Path(resource_filename(__name__, "include")).resolve()
+cmake_prefix = p / 'cmake'
diff --git a/cuvec/include/pycuvec.cuh b/cuvec/include/pycuvec.cuh
@@ -229,5 +229,44 @@ template <class T> PyCuVec<T> *PyCuVec_deepcopy(PyCuVec<T> *other) {
   self->strides = other->strides;
   return self;
 }
+/// returns `getattr(o, 'cuvec', o) or NULL` without altering refcount
+template <class T> PyCuVec<T> *asPyCuVec(PyObject *o) {
+  if (!o || Py_None == o) return NULL;
+  if (PyObject_HasAttrString(o, "cuvec")) {
+    o = PyObject_GetAttrString(o, "cuvec");
+    if (!o) return NULL;
+    Py_DECREF(o);
+  }
+  return (PyCuVec<T> *)o;
+}
+template <class T> PyCuVec<T> *asPyCuVec(PyCuVec<T> *o) {
+  if (!o || Py_None == (PyObject *)o) return NULL;
+  if (PyObject_HasAttrString((PyObject *)o, "cuvec")) {
+    o = (PyCuVec<T> *)PyObject_GetAttrString((PyObject *)o, "cuvec");
+    if (!o) return NULL;
+    Py_DECREF((PyObject *)o);
+  }
+  return o;
+}
+/// conversion functions for PyArg_Parse...(..., "O&", ...)
+#define ASCUVEC(T, typechar)                                                                      \
+  int asPyCuVec_##typechar(PyObject *object, void **address) {                                    \
+    *address = (void *)asPyCuVec<T>(object);                                                      \
+    return 1;                                                                                     \
+  }
+ASCUVEC(signed char, b)
+ASCUVEC(unsigned char, B)
+ASCUVEC(char, c)
+ASCUVEC(short, h)
+ASCUVEC(unsigned short, H)
+ASCUVEC(int, i)
+ASCUVEC(unsigned int, I)
+ASCUVEC(long long, q)
+ASCUVEC(unsigned long long, Q)
+#ifdef _CUVEC_HALF
+ASCUVEC(_CUVEC_HALF, e)
+#endif
+ASCUVEC(float, f)
+ASCUVEC(double, d)
 
 #endif // _PYCUVEC_H_
diff --git a/cuvec/src/example_mod/example_mod.cu b/cuvec/src/example_mod/example_mod.cu
@@ -19,12 +19,14 @@ __global__ void _d_incr(float *dst, float *src, int X, int Y) {
 }
 #endif // CUVEC_DISABLE_CUDA
 static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs) {
-  PyCuVec<float> *dst = NULL;
   PyCuVec<float> *src = NULL;
-  static const char *kwds[] = {"src", "output", NULL};
-  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", (char **)kwds, (PyObject **)&src,
-                                   (PyObject **)&dst))
+  PyCuVec<float> *dst = NULL;
+  bool timing = false;
+  static const char *kwds[] = {"src", "output", "timing", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|Ob", (char **)kwds, &asPyCuVec_f, &src, &dst,
+                                   &timing))
     return NULL;
+  dst = asPyCuVec(dst);
   if (!src) return NULL;
   std::vector<Py_ssize_t> &N = src->shape;
   if (N.size() != 2) {
@@ -47,6 +49,7 @@ static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs)
       PyErr_SetString(PyExc_IndexError, "`output` must be same shape as `src`");
       return NULL;
     }
+    Py_INCREF((PyObject *)dst); // anticipating returning
   } else {
     dst = PyCuVec_zeros_like(src);
     if (!dst) return NULL;
@@ -72,7 +75,12 @@ static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs)
   double kernel_ms = std::chrono::duration<double, std::milli>(eKern - eAlloc).count();
 // fprintf(stderr, "%.3lf ms, %.3lf ms\n", alloc_ms, kernel_ms);
 #endif
-  return Py_BuildValue("ddN", double(alloc_ms), double(kernel_ms), (PyObject *)dst);
+  if (timing) {
+    // hack: store times in first two elements of output
+    dst->vec[0] = alloc_ms;
+    dst->vec[1] = kernel_ms;
+  }
+  return (PyObject *)dst;
 }
 static PyMethodDef example_methods[] = {
     {"increment2d_f", (PyCFunction)increment2d_f, METH_VARARGS | METH_KEYWORDS,

diff --git a/cuvec/swigcuvec.py b/cuvec/swigcuvec.py
@@ -172,13 +172,28 @@ def copy(arr) -> CuVec:
     return CuVec(cu_copy(arr))
 
 
-def asarray(arr, dtype=None, order=None) -> CuVec:
+def asarray(arr, dtype=None, order=None, ownership: str = 'warning') -> CuVec:
     """
     Returns a `swigcuvec.CuVec` view of `arr`, avoiding memory copies if possible.
     (`cuvec` equivalent of `numpy.asarray`).
+
+    Args:
+      ownership: logging level if `is_raw_cuvec(arr)`.
+        WARNING: `asarray()` should not be used on an existing reference, e.g.:
+        >>> res = asarray(some_swig_api_func(..., output=getattr(out, 'cuvec', None)))
+        `res.cuvec` and `out.cuvec` are now the same
+        yet garbage collected separately (dangling ptr).
+        Instead, use:
+        >>> res = some_swig_api_func(..., output=getattr(out, 'cuvec', None))
+        >>> res = out if hasattr(out, 'cuvec') else asarray(res)
+        NB: `asarray()` is safe if the raw cuvec was created in C++/SWIG, e.g.:
+        >>> res = asarray(some_swig_api_func(..., output=None), ownership='debug')
     """
     if is_raw_cuvec(arr):
-        log.debug("taking ownership")
+        ownership = ownership.lower()
+        if ownership in {'critical', 'fatal', 'error'}:
+            raise IOError("Can't take ownership of existing cuvec (would create dangling ptr)")
+        getattr(log, ownership)("taking ownership")
         arr = SWIGVector(None, None, arr)
     if not isinstance(arr, np.ndarray) and is_raw_swvec(arr):
         res = CuVec(arr)

diff --git a/docs/index.md b/docs/index.md
@@ -72,6 +72,24 @@ Requirements:
     // PyCuVec<T> *PyCuVec_zeros(std::vector<Py_ssize_t> shape);
     // PyCuVec<T> *PyCuVec_zeros_like(PyCuVec<T> *other);
     // PyCuVec<T> *PyCuVec_deepcopy(PyCuVec<T> *other);
+    /// returns `NULL` if `self is None`, or
+    /// `getattr(self, 'cuvec', self)` otherwise:
+    // PyCuVec<T> *asPyCuVec(PyObject *self);
+    // PyCuVec<T> *asPyCuVec(PyCuVec<T> *self);
+    /// conversion functions for `PyArg_Parse*()`
+    /// e.g.: `PyArg_ParseTuple(args, "O&", &PyCuVec_f, &obj)`:
+    // int asPyCuVec_b(PyObject *o, PyCuVec<signed char> **self);
+    // int asPyCuVec_B(PyObject *o, PyCuVec<unsigned char> **self);
+    // int asPyCuVec_c(PyObject *o, PyCuVec<char> **self);
+    // int asPyCuVec_h(PyObject *o, PyCuVec<short> **self);
+    // int asPyCuVec_H(PyObject *o, PyCuVec<unsigned short> **self);
+    // int asPyCuVec_i(PyObject *o, PyCuVec<int> **self);
+    // int asPyCuVec_I(PyObject *o, PyCuVec<unsigned int> **self);
+    // int asPyCuVec_q(PyObject *o, PyCuVec<long long> **self);
+    // int asPyCuVec_Q(PyObject *o, PyCuVec<unsigned long long> **self);
+    // int asPyCuVec_e(PyObject *o, PyCuVec<__half> **self);
+    // int asPyCuVec_f(PyObject *o, PyCuVec<float> **self);
+    // int asPyCuVec_d(PyObject *o, PyCuVec<double> **self);
     ```
 
 === "C++/SWIG API"
@@ -112,7 +130,7 @@ The following involve no memory copies.
 
 === "**CPython API** to **C++**"
     ```cpp
-    /// input: `PyObject *obj` (obtained from e.g.: `PyArg_ParseTuple()`, etc)
+    /// input: `PyObject *obj` (obtained from e.g.: `PyArg_Parse*()`, etc)
     /// output: `CuVec<type> vec`
     CuVec<float> &vec = ((PyCuVec<float> *)obj)->vec; // like std::vector<float>
     std::vector<Py_ssize_t> &shape = ((PyCuVec<float> *)obj)->shape;
@@ -165,7 +183,7 @@ Python:
     import cuvec, numpy, mymod
     arr = cuvec.zeros((1337, 42, 7), "float32")
     assert all(numpy.mean(arr, axis=(0, 1)) == 0)
-    print(cuvec.asarray(mymod.myfunc(arr.cuvec)).sum())
+    print(cuvec.asarray(mymod.myfunc(arr)).sum())
     ```
 
 === "Alternative: with CuVec & SWIG"
@@ -233,8 +251,8 @@ C++:
       PyCuVec<float> *src = NULL;
       PyCuVec<float> *dst = NULL;
       static const char *kwds[] = {"src", "output", NULL};
-      if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", (char **)kwds,
-                                       (PyObject **)&src, (PyObject **)&dst))
+      if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|O&", (char **)kwds,
+                                      &asPyCuVec_f, &src, &asPyCuVec_f, &dst))
         return NULL;
 
 

diff --git a/setup.cfg b/setup.cfg
@@ -42,7 +42,9 @@ setup_requires=
     scikit-build>=0.11.0
     cmake>=3.18
     ninja
-install_requires=setuptools; numpy
+install_requires=
+    importlib_resources; python_version < "3.9"
+    numpy
 python_requires=>=3.6
 [options.extras_require]
 dev=

diff --git a/tests/test_cuvec.py b/tests/test_cuvec.py
@@ -0,0 +1,14 @@
+import cuvec as cu
+
+
+def test_includes():
+    assert cu.include_path.is_dir()
+    assert {i.name for i in cu.include_path.iterdir()} == {'cuvec.cuh', 'pycuvec.cuh', 'cuvec.i'}
+
+
+def test_cmake_prefix():
+    assert cu.cmake_prefix.is_dir()
+    assert {i.name
+            for i in cu.cmake_prefix.iterdir()} == {
+                f'AMYPADcuvec{i}.cmake'
+                for i in ('Config', 'ConfigVersion', 'Targets', 'Targets-release')}
diff --git a/tests/test_perf.py b/tests/test_perf.py
@@ -67,24 +67,17 @@ def test_perf(cu, ex, shape=(1337, 42), quiet=False):
     t['assign'] = (time() - tic - overhead) * 1000
 
     if not quiet:
-        if cu is sw:
-            t['warmup'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
-            t['> create dst'], t['> kernel'] = cu.asarray(res)[0, :2]
-        else:
-            t['warmup'], (t['> create dst'], t['> kernel'], _) = timer(ex.increment2d_f)(src.cuvec)
-    if cu is sw:
-        t['call ext'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
-        t['- create dst'], t['- kernel'] = None, None
-        t['view'], dst = timer(cu.asarray)(res)
-        t['- create dst'], t['- kernel'] = dst[0, :2]
-    else:
-        t['call ext'], (t['- create dst'], t['- kernel'], res) = timer(ex.increment2d_f)(src.cuvec)
-        t['view'], dst = timer(cu.asarray)(res)
+        t['warmup'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
+        t['> create dst'], t['> kernel'] = cu.asarray(res)[0, :2]
+    t['call ext'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
+    t['- create dst'], t['- kernel'] = None, None
+    t['view'], dst = timer(cu.asarray)(res)
+    t['- create dst'], t['- kernel'] = dst[0, :2]
 
     if not quiet:
         print("\n".join(f"{k.ljust(14)} | {v:.3f}" for k, v in t.items()))
     assert (src + 1 == dst)[1:].all()
-    assert (src + 1 == dst)[0, 2 if cu is sw else 0:].all()
+    assert (src + 1 == dst)[0, 2:].all()
     # even a fast kernel takes longer than API overhead
     assert t['- kernel'] / (t['call ext'] - t['- create dst']) > 0.5
     # API call should be <0.1 ms... but set a higher threshold of 2 ms

diff --git a/tests/test_pycuvec.py b/tests/test_pycuvec.py
@@ -123,3 +123,30 @@ def test_cuda_array_interface():
     assert ndarr.dtype == v.dtype
     with raises(AttributeError):
         ndarr.__cuda_array_interface__
+
+
+def test_increment():
+    # `example_mod` is defined in ../cuvec/src/example_mod/
+    from cuvec.example_mod import increment2d_f
+    a = cu.zeros((1337, 42), 'f')
+    assert (a == 0).all()
+    res = cu.asarray(increment2d_f(a.cuvec, a.cuvec))
+    assert (a == 1).all()
+    assert (res == 1).all()
+
+    a[:] = 0
+    assert (a == 0).all()
+    assert (res == 0).all()
+
+    res = cu.asarray(increment2d_f(a))
+    assert (res == 1).all()
+
+
+def test_increment_return():
+    from cuvec.example_mod import increment2d_f
+    a = cu.zeros((1337, 42), 'f')
+    assert (a == 0).all()
+    res = cu.asarray(increment2d_f(a, a))
+    assert (a == 1).all()
+    del a
+    assert (res == 1).all()
diff --git a/tests/test_swigcuvec.py b/tests/test_swigcuvec.py
@@ -109,6 +109,8 @@ def test_asarray():
     assert (s == v[1:]).all()
     assert str(s.swvec) != str(v.swvec)
     assert np.asarray(s.swvec).data != np.asarray(v.swvec).data
+    with raises(IOError):
+        cu.asarray(s.swvec.cuvec, ownership='error')
 
 
 def test_cuda_array_interface():