Skip to content

Commit

Permalink
bump verison, merge pull request #17 from AMYPAD/devel
Browse files Browse the repository at this point in the history
  • Loading branch information
casperdcl authored Nov 1, 2021
2 parents baa3bf0 + 6118807 commit ef2a25b
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 31 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ jobs:
- run: pip install -U -e .[dev]
- run: pytest
- uses: codecov/codecov-action@v1
- name: compile -Wall
run: |
git clean -Xdf
pip install toml
python -c 'import toml; c=toml.load("pyproject.toml"); print("\0".join(c["build-system"]["requires"]), end="")' \
| xargs -0 pip install
python setup.py build -- -DCUVEC_DEBUG=1 \
-DCMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
cuda:
if: github.event_name != 'pull_request' || github.repository_owner != 'AMYPAD'
name: CUDA py${{ matrix.python }}
Expand All @@ -71,6 +79,14 @@ jobs:
- run: pip install -U -e .[dev]
- run: pytest
- uses: codecov/codecov-action@v1
- name: compile -Wall
run: |
git clean -Xdf
pip install toml
python -c 'import toml; c=toml.load("pyproject.toml"); print("\0".join(c["build-system"]["requires"]), end="")' \
| xargs -0 pip install
python setup.py build -- -DCUVEC_DEBUG=1 \
-DCMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
- name: Post Run setup-python
run: setup-python -p${{ matrix.python }} -Dr
if: ${{ always() }}
Expand Down
13 changes: 8 additions & 5 deletions cuvec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
# data
'typecodes', 'vec_types'] # yapf: disable

from pathlib import Path

from pkg_resources import resource_filename
try: # py<3.9
import importlib_resources as resources
except ImportError:
from importlib import resources

try:
from .cuvec import dev_sync
Expand All @@ -37,6 +38,8 @@
else:
from .pycuvec import CuVec, asarray, copy, cu_copy, cu_zeros, typecodes, vec_types, zeros

p = resources.files('cuvec').resolve()
# for C++/CUDA/SWIG includes
include_path = p / 'include'
# for use in `cmake -DCMAKE_PREFIX_PATH=...`
cmake_prefix = Path(resource_filename(__name__, "cmake")).resolve()
include_path = Path(resource_filename(__name__, "include")).resolve()
cmake_prefix = p / 'cmake'
39 changes: 39 additions & 0 deletions cuvec/include/pycuvec.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -229,5 +229,44 @@ template <class T> PyCuVec<T> *PyCuVec_deepcopy(PyCuVec<T> *other) {
self->strides = other->strides;
return self;
}
/// returns `getattr(o, 'cuvec', o) or NULL` without altering refcount
template <class T> PyCuVec<T> *asPyCuVec(PyObject *o) {
if (!o || Py_None == o) return NULL;
if (PyObject_HasAttrString(o, "cuvec")) {
o = PyObject_GetAttrString(o, "cuvec");
if (!o) return NULL;
Py_DECREF(o);
}
return (PyCuVec<T> *)o;
}
template <class T> PyCuVec<T> *asPyCuVec(PyCuVec<T> *o) {
if (!o || Py_None == (PyObject *)o) return NULL;
if (PyObject_HasAttrString((PyObject *)o, "cuvec")) {
o = (PyCuVec<T> *)PyObject_GetAttrString((PyObject *)o, "cuvec");
if (!o) return NULL;
Py_DECREF((PyObject *)o);
}
return o;
}
/// conversion functions for PyArg_Parse...(..., "O&", ...)
#define ASCUVEC(T, typechar) \
int asPyCuVec_##typechar(PyObject *object, void **address) { \
*address = (void *)asPyCuVec<T>(object); \
return 1; \
}
ASCUVEC(signed char, b)
ASCUVEC(unsigned char, B)
ASCUVEC(char, c)
ASCUVEC(short, h)
ASCUVEC(unsigned short, H)
ASCUVEC(int, i)
ASCUVEC(unsigned int, I)
ASCUVEC(long long, q)
ASCUVEC(unsigned long long, Q)
#ifdef _CUVEC_HALF
ASCUVEC(_CUVEC_HALF, e)
#endif
ASCUVEC(float, f)
ASCUVEC(double, d)

#endif // _PYCUVEC_H_
18 changes: 13 additions & 5 deletions cuvec/src/example_mod/example_mod.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ __global__ void _d_incr(float *dst, float *src, int X, int Y) {
}
#endif // CUVEC_DISABLE_CUDA
static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs) {
PyCuVec<float> *dst = NULL;
PyCuVec<float> *src = NULL;
static const char *kwds[] = {"src", "output", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", (char **)kwds, (PyObject **)&src,
(PyObject **)&dst))
PyCuVec<float> *dst = NULL;
bool timing = false;
static const char *kwds[] = {"src", "output", "timing", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|Ob", (char **)kwds, &asPyCuVec_f, &src, &dst,
&timing))
return NULL;
dst = asPyCuVec(dst);
if (!src) return NULL;
std::vector<Py_ssize_t> &N = src->shape;
if (N.size() != 2) {
Expand All @@ -47,6 +49,7 @@ static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_IndexError, "`output` must be same shape as `src`");
return NULL;
}
Py_INCREF((PyObject *)dst); // anticipating returning
} else {
dst = PyCuVec_zeros_like(src);
if (!dst) return NULL;
Expand All @@ -72,7 +75,12 @@ static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs)
double kernel_ms = std::chrono::duration<double, std::milli>(eKern - eAlloc).count();
// fprintf(stderr, "%.3lf ms, %.3lf ms\n", alloc_ms, kernel_ms);
#endif
return Py_BuildValue("ddN", double(alloc_ms), double(kernel_ms), (PyObject *)dst);
if (timing) {
// hack: store times in first two elements of output
dst->vec[0] = alloc_ms;
dst->vec[1] = kernel_ms;
}
return (PyObject *)dst;
}
static PyMethodDef example_methods[] = {
{"increment2d_f", (PyCFunction)increment2d_f, METH_VARARGS | METH_KEYWORDS,
Expand Down
19 changes: 17 additions & 2 deletions cuvec/swigcuvec.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,28 @@ def copy(arr) -> CuVec:
return CuVec(cu_copy(arr))


def asarray(arr, dtype=None, order=None) -> CuVec:
def asarray(arr, dtype=None, order=None, ownership: str = 'warning') -> CuVec:
"""
Returns a `swigcuvec.CuVec` view of `arr`, avoiding memory copies if possible.
(`cuvec` equivalent of `numpy.asarray`).
Args:
ownership: logging level if `is_raw_cuvec(arr)`.
WARNING: `asarray()` should not be used on an existing reference, e.g.:
>>> res = asarray(some_swig_api_func(..., output=getattr(out, 'cuvec', None)))
`res.cuvec` and `out.cuvec` are now the same
yet garbage collected separately (dangling ptr).
Instead, use:
>>> res = some_swig_api_func(..., output=getattr(out, 'cuvec', None))
>>> res = out if hasattr(out, 'cuvec') else asarray(res)
NB: `asarray()` is safe if the raw cuvec was created in C++/SWIG, e.g.:
>>> res = asarray(some_swig_api_func(..., output=None), ownership='debug')
"""
if is_raw_cuvec(arr):
log.debug("taking ownership")
ownership = ownership.lower()
if ownership in {'critical', 'fatal', 'error'}:
raise IOError("Can't take ownership of existing cuvec (would create dangling ptr)")
getattr(log, ownership)("taking ownership")
arr = SWIGVector(None, None, arr)
if not isinstance(arr, np.ndarray) and is_raw_swvec(arr):
res = CuVec(arr)
Expand Down
26 changes: 22 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ Requirements:
// PyCuVec<T> *PyCuVec_zeros(std::vector<Py_ssize_t> shape);
// PyCuVec<T> *PyCuVec_zeros_like(PyCuVec<T> *other);
// PyCuVec<T> *PyCuVec_deepcopy(PyCuVec<T> *other);
/// returns `NULL` if `self is None`, or
/// `getattr(self, 'cuvec', self)` otherwise:
// PyCuVec<T> *asPyCuVec(PyObject *self);
// PyCuVec<T> *asPyCuVec(PyCuVec<T> *self);
/// conversion functions for `PyArg_Parse*()`
/// e.g.: `PyArg_ParseTuple(args, "O&", &PyCuVec_f, &obj)`:
// int asPyCuVec_b(PyObject *o, PyCuVec<signed char> **self);
// int asPyCuVec_B(PyObject *o, PyCuVec<unsigned char> **self);
// int asPyCuVec_c(PyObject *o, PyCuVec<char> **self);
// int asPyCuVec_h(PyObject *o, PyCuVec<short> **self);
// int asPyCuVec_H(PyObject *o, PyCuVec<unsigned short> **self);
// int asPyCuVec_i(PyObject *o, PyCuVec<int> **self);
// int asPyCuVec_I(PyObject *o, PyCuVec<unsigned int> **self);
// int asPyCuVec_q(PyObject *o, PyCuVec<long long> **self);
// int asPyCuVec_Q(PyObject *o, PyCuVec<unsigned long long> **self);
// int asPyCuVec_e(PyObject *o, PyCuVec<__half> **self);
// int asPyCuVec_f(PyObject *o, PyCuVec<float> **self);
// int asPyCuVec_d(PyObject *o, PyCuVec<double> **self);
```

=== "C++/SWIG API"
Expand Down Expand Up @@ -112,7 +130,7 @@ The following involve no memory copies.

=== "**CPython API** to **C++**"
```cpp
/// input: `PyObject *obj` (obtained from e.g.: `PyArg_ParseTuple()`, etc)
/// input: `PyObject *obj` (obtained from e.g.: `PyArg_Parse*()`, etc)
/// output: `CuVec<type> vec`
CuVec<float> &vec = ((PyCuVec<float> *)obj)->vec; // like std::vector<float>
std::vector<Py_ssize_t> &shape = ((PyCuVec<float> *)obj)->shape;
Expand Down Expand Up @@ -165,7 +183,7 @@ Python:
import cuvec, numpy, mymod
arr = cuvec.zeros((1337, 42, 7), "float32")
assert all(numpy.mean(arr, axis=(0, 1)) == 0)
print(cuvec.asarray(mymod.myfunc(arr.cuvec)).sum())
print(cuvec.asarray(mymod.myfunc(arr)).sum())
```

=== "Alternative: with CuVec & SWIG"
Expand Down Expand Up @@ -233,8 +251,8 @@ C++:
PyCuVec<float> *src = NULL;
PyCuVec<float> *dst = NULL;
static const char *kwds[] = {"src", "output", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", (char **)kwds,
(PyObject **)&src, (PyObject **)&dst))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|O&", (char **)kwds,
&asPyCuVec_f, &src, &asPyCuVec_f, &dst))
return NULL;


Expand Down
4 changes: 3 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ setup_requires=
scikit-build>=0.11.0
cmake>=3.18
ninja
install_requires=setuptools; numpy
install_requires=
importlib_resources; python_version < "3.9"
numpy
python_requires=>=3.6
[options.extras_require]
dev=
Expand Down
14 changes: 14 additions & 0 deletions tests/test_cuvec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import cuvec as cu


def test_includes():
assert cu.include_path.is_dir()
assert {i.name for i in cu.include_path.iterdir()} == {'cuvec.cuh', 'pycuvec.cuh', 'cuvec.i'}


def test_cmake_prefix():
assert cu.cmake_prefix.is_dir()
assert {i.name
for i in cu.cmake_prefix.iterdir()} == {
f'AMYPADcuvec{i}.cmake'
for i in ('Config', 'ConfigVersion', 'Targets', 'Targets-release')}
21 changes: 7 additions & 14 deletions tests/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,24 +67,17 @@ def test_perf(cu, ex, shape=(1337, 42), quiet=False):
t['assign'] = (time() - tic - overhead) * 1000

if not quiet:
if cu is sw:
t['warmup'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
t['> create dst'], t['> kernel'] = cu.asarray(res)[0, :2]
else:
t['warmup'], (t['> create dst'], t['> kernel'], _) = timer(ex.increment2d_f)(src.cuvec)
if cu is sw:
t['call ext'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
t['- create dst'], t['- kernel'] = None, None
t['view'], dst = timer(cu.asarray)(res)
t['- create dst'], t['- kernel'] = dst[0, :2]
else:
t['call ext'], (t['- create dst'], t['- kernel'], res) = timer(ex.increment2d_f)(src.cuvec)
t['view'], dst = timer(cu.asarray)(res)
t['warmup'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
t['> create dst'], t['> kernel'] = cu.asarray(res)[0, :2]
t['call ext'], res = timer(ex.increment2d_f)(src.cuvec, None, True)
t['- create dst'], t['- kernel'] = None, None
t['view'], dst = timer(cu.asarray)(res)
t['- create dst'], t['- kernel'] = dst[0, :2]

if not quiet:
print("\n".join(f"{k.ljust(14)} | {v:.3f}" for k, v in t.items()))
assert (src + 1 == dst)[1:].all()
assert (src + 1 == dst)[0, 2 if cu is sw else 0:].all()
assert (src + 1 == dst)[0, 2:].all()
# even a fast kernel takes longer than API overhead
assert t['- kernel'] / (t['call ext'] - t['- create dst']) > 0.5
# API call should be <0.1 ms... but set a higher threshold of 2 ms
Expand Down
27 changes: 27 additions & 0 deletions tests/test_pycuvec.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,30 @@ def test_cuda_array_interface():
assert ndarr.dtype == v.dtype
with raises(AttributeError):
ndarr.__cuda_array_interface__


def test_increment():
# `example_mod` is defined in ../cuvec/src/example_mod/
from cuvec.example_mod import increment2d_f
a = cu.zeros((1337, 42), 'f')
assert (a == 0).all()
res = cu.asarray(increment2d_f(a.cuvec, a.cuvec))
assert (a == 1).all()
assert (res == 1).all()

a[:] = 0
assert (a == 0).all()
assert (res == 0).all()

res = cu.asarray(increment2d_f(a))
assert (res == 1).all()


def test_increment_return():
from cuvec.example_mod import increment2d_f
a = cu.zeros((1337, 42), 'f')
assert (a == 0).all()
res = cu.asarray(increment2d_f(a, a))
assert (a == 1).all()
del a
assert (res == 1).all()
2 changes: 2 additions & 0 deletions tests/test_swigcuvec.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def test_asarray():
assert (s == v[1:]).all()
assert str(s.swvec) != str(v.swvec)
assert np.asarray(s.swvec).data != np.asarray(v.swvec).data
with raises(IOError):
cu.asarray(s.swvec.cuvec, ownership='error')


def test_cuda_array_interface():
Expand Down

0 comments on commit ef2a25b

Please sign in to comment.