From c1184a1877cf15e969c8ebed51282e4dcad945dd Mon Sep 17 00:00:00 2001 From: John S Bogaardt Date: Fri, 14 Feb 2020 13:59:10 -0700 Subject: [PATCH] cupy_json compatibility --- README.md | 28 +++++++++++------------ chainladder/core/io.py | 22 +++++++++++++----- chainladder/core/slice.py | 2 +- chainladder/utils/tests/test_utilities.py | 2 +- chainladder/utils/utility_functions.py | 11 ++++++--- docs/tutorials/triangle-tutorial.ipynb | 2 +- setup.py | 2 +- 7 files changed, 42 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 4ca9cf78..d55f9dc4 100644 --- a/README.md +++ b/README.md @@ -13,20 +13,6 @@ We figure an actuary who uses python has reasonable familiarity with pandas and scikit-learn, so they can spend as little mental energy as possible learning yet another API. -##### Now with GPU support -New in version `0.5.0` - `chainladder` now supports CUDA-based GPU computations by way of [CuPY](https://github.com/cupy/cupy). You can now swap `array_backend` between `numpy` and `cupy` to switch between CPU and GPU-based computations. - -Array backends can be set globally: -```python -import chainladder as cl -cl.array_backend('cupy') -``` -Alternatively, they can be set per `Triangle` instance. -```python -cl.Triangle(..., array_backend='cupy') -``` -**Note** you must have a CUDA-enabled graphics card and [CuPY](https://github.com/cupy/cupy) installed to use the GPU backend. - ## Documentation Please visit the [Documentation](https://chainladder-python.readthedocs.io/en/latest/) page for examples, how-tos, and source @@ -54,3 +40,17 @@ Alternatively, install directly from github: Note: This package requires Python 3.5 and later, numpy 1.12.0 and later, pandas 0.23.0 and later, scikit-learn 0.18.0 and later. + +##### Now with GPU support +New in version `0.5.0` - `chainladder` now supports CUDA-based GPU computations by way of [CuPY](https://github.com/cupy/cupy). You can now swap `array_backend` between `numpy` and `cupy` to switch between CPU and GPU-based computations. + +Array backends can be set globally: +```python +import chainladder as cl +cl.array_backend('cupy') +``` +Alternatively, they can be set per `Triangle` instance. +```python +cl.Triangle(..., array_backend='cupy') +``` +**Note** you must have a CUDA-enabled graphics card and [CuPY](https://github.com/cupy/cupy) installed to use the GPU backend. diff --git a/chainladder/core/io.py b/chainladder/core/io.py index 12f637ec..0f1c6ae7 100644 --- a/chainladder/core/io.py +++ b/chainladder/core/io.py @@ -22,7 +22,12 @@ def to_json(self): ''' def sparse_out(tri): k, v, o, d = tri.shape - coo = coo_matrix(np.nan_to_num(tri.values.reshape((k*v*o, d)))) + xp = cp.get_array_module(tri) + if xp == cp: + out = cp.asnumpy(tri) + else: + out = tri + coo = coo_matrix(np.nan_to_num(out.reshape((k*v*o, d)))) return json.dumps(dict(zip([str(item) for item in zip(coo.row, coo.col)], coo.data))) json_dict = {} @@ -38,15 +43,20 @@ def sparse_out(tri): json_dict[attribute] = { 'dtype': str(getattr(self, attribute).dtype), 'array': getattr(self, attribute).tolist()} - if np.sum(np.nan_to_num(self.values)==0) / np.prod(self.shape) > 0.40: + xp = cp.get_array_module(self.values) + if xp == cp: + out = cp.asnumpy(self.cum_to_incr().values) + else: + out = self.cum_to_incr().values + if np.sum(np.nan_to_num(out)==0) / np.prod(self.shape) > 0.40: json_dict['values'] = { - 'dtype': str(self.values.dtype), - 'array': sparse_out(self.cum_to_incr()), + 'dtype': str(out.dtype), + 'array': sparse_out(out), 'sparse': True} else: json_dict['values'] = { - 'dtype': str(self.values.dtype), - 'array': self.values.tolist(), + 'dtype': str(out.dtype), + 'array': out.tolist(), 'sparse': False} json_dict['key_labels'] = self.key_labels json_dict['origin_grain'] = self.origin_grain diff --git a/chainladder/core/slice.py b/chainladder/core/slice.py index 69673b80..981488b3 100644 --- a/chainladder/core/slice.py +++ b/chainladder/core/slice.py @@ -23,7 +23,7 @@ def get_idx(self, idx): x_0 = list(pd.Series([item[0] for item in idx.values[:, 0]]).unique()) x_1 = list(pd.Series([item[1] for item in idx.values[0, :]]).unique()) obj.values = \ - obj.values[self._contig_slice(x_0)][:, self._contig_slice(x_1)] + obj.values[self._contig_slice(x_0), ...][:, self._contig_slice(x_1), ...] obj.values[obj.values == 0] = np.nan return obj diff --git a/chainladder/utils/tests/test_utilities.py b/chainladder/utils/tests/test_utilities.py index 39fe5675..1205da96 100644 --- a/chainladder/utils/tests/test_utilities.py +++ b/chainladder/utils/tests/test_utilities.py @@ -26,7 +26,7 @@ def test_triangle_json_io(): def test_json_for_val(): x = cl.load_dataset('raa').dev_to_val().to_json() - cl.read_json(x) == cl.load_dataset('raa').dev_to_val() + assert cl.read_json(x) == cl.load_dataset('raa').dev_to_val() def test_estimator_json_io(): assert cl.read_json(cl.Development().to_json()).get_params() == \ diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 71746155..387ac06e 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -3,6 +3,7 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. import pandas as pd import numpy as np +from chainladder.utils.cupy import cp from scipy.sparse import coo_matrix import joblib import json @@ -48,7 +49,7 @@ def read_pickle(path): return joblib.load(path) -def read_json(json_str): +def read_json(json_str, array_backend=None): def sparse_in(json_str, dtype, shape): k, v, o, d = shape x = json.loads(json_str) @@ -60,6 +61,9 @@ def sparse_in(json_str, dtype, shape): new[new==0] = np.nan return new + if array_backend is None: + from chainladder import ARRAY_BACKEND + array_backend = ARRAY_BACKEND json_dict = json.loads(json_str) if type(json_dict) is list: import chainladder as cl @@ -68,7 +72,7 @@ def sparse_in(json_str, dtype, shape): cl.__dict__[item['__class__']]().set_params(**item['params'])) for item in json_dict]) elif 'kdims' in json_dict.keys(): - tri = Triangle() + tri = Triangle(array_backend=array_backend) arrays = ['kdims', 'vdims', 'odims', 'ddims'] for array in arrays: setattr(tri, array, np.array( @@ -91,7 +95,8 @@ def sparse_in(json_str, dtype, shape): tri = tri.incr_to_cum() else: tri.values = np.array(json_dict['values']['array'], dtype=json_dict['values']['dtype']) - + if array_backend == 'cupy': + tri.values = cp.array(tri.values) return tri else: import chainladder as cl diff --git a/docs/tutorials/triangle-tutorial.ipynb b/docs/tutorials/triangle-tutorial.ipynb index 28169824..14847926 100644 --- a/docs/tutorials/triangle-tutorial.ipynb +++ b/docs/tutorials/triangle-tutorial.ipynb @@ -8977,7 +8977,7 @@ "metadata": {}, "outputs": [], "source": [ - "x.sum().to_clipboard() # Automatically converts to a pandas dataframe and puts in the clipbard for pasting in Excel" + "x.sum().to_clipboard() # Automatically converts to a pandas dataframe and puts in the clipboard for pasting in Excel" ] }, { diff --git a/setup.py b/setup.py index 33ad068d..8e80e802 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ version=version, maintainer='John Bogaardt', maintainer_email='jbogaardt@gmail.com', - packages=['{}.{}'.format(name, p) for p in find_packages(where=name)]+['chainladder'], + packages=['{}.{}'.format(name, p) for p in find_packages(where=name)]+[name], scripts=[], url=url, download_url='{}/archive/v{}.tar.gz'.format(url, version),