diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..531f9fa --- /dev/null +++ b/.pylintrc @@ -0,0 +1,21 @@ +[MESSAGES CONTROL] + +disable= + bad-continuation, + bad-whitespace, + invalid-name, + missing-module-docstring, + missing-class-docstring, + missing-function-docstring, + redefined-outer-name, + c-extension-no-member, + too-many-arguments, + too-many-instance-attributes, + too-many-locals, + invalid-unary-operand-type, + not-context-manager, + redefined-builtin + +[format] + +max-line-length=88 diff --git a/README.md b/README.md index 8301599..754647b 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,23 @@ You may see performance benefits from `fastmath` by installing Intel's short vec conda install -c numba icc_rt ``` +## Debugging + +Debugging is often simpler without `jit`ting. To disable `numba`, + +```bash +export NUMBA_DISABLE_JIT=1 +``` + +and re-enable with + +```bash +export NUMBA_DISABLE_JIT=0 +``` + +Be wary of using `os.environ["NUMBA_DISABLE_JIT"] = "1"` from python code - this must be set above imports. + + ## Differences compared to Scikit-learn 1. All operations are done using reduced distances. E.g. provided `KDTree` implementations use squared distances rather than actual distances both for inputs and outputs. diff --git a/example/compare_ifp.py b/example/compare_ifp.py index d0d49d8..ae437cd 100644 --- a/example/compare_ifp.py +++ b/example/compare_ifp.py @@ -1,13 +1,8 @@ -import os - import numpy as np from numba_neighbors import binary_tree as bt from numba_neighbors import kd_tree as kd -os.environ["NUMBA_DISABLE_JIT"] = "1" - - N = 100 n = 50 D = 1 @@ -20,7 +15,7 @@ np.random.seed(124) data = np.random.uniform(size=(N, D)).astype(kd.FLOAT_TYPE) -# data.sort(axis=0) +data.sort(axis=0) print(data) tree = kd.KDTree(data, leaf_size=leaf_size) diff --git a/example/ifp_sample.py b/example/ifp_sample.py index 81cc2eb..4642406 100644 --- a/example/ifp_sample.py +++ b/example/ifp_sample.py @@ -1,15 +1,10 @@ -import os - import matplotlib.pyplot as plt import numpy as np from numba_neighbors import kd_tree as kd -os.environ["NUMBA_DISABLE_JIT"] = "1" - - N = 1024 -n = 256 +n = 70 D = 2 # rejection_r = 0.1 query_r = 0.1 @@ -22,7 +17,7 @@ data = np.random.uniform(size=(N, D)).astype(kd.FLOAT_TYPE) tree = kd.KDTree(data, leaf_size=leaf_size) -sample_result, query_result = tree.ifp_sample_query( +sample_result0, query_result0 = tree.ifp_sample_query( r2, tree.get_node_indices(), n, max_neighbors ) sample_result, query_result = tree.rejection_ifp_sample_query( @@ -31,13 +26,7 @@ def vis( - x0, - sample_indices, - query_result, - small_balls=True, - big_balls=False, - labels=False, - aspect=1, + x0, sample_indices, query_result, small_balls=True, big_balls=False, labels=False, ): x1 = x0[sample_indices] xn = x0[query_result.indices[0, : query_result.counts[0]]] @@ -69,5 +58,5 @@ def vis( ax.set_aspect(1) -vis(data, sample_result.indices, query_result) +vis(data, sample_result.indices, query_result, big_balls=False) plt.show() diff --git a/example/index_heap.py b/example/index_heap.py index 4bd5f1b..b07934b 100644 --- a/example/index_heap.py +++ b/example/index_heap.py @@ -1,12 +1,7 @@ -import os - import numpy as np from numba_neighbors.index_heap import padded_index_heap -os.environ["NUMBA_DISABLE_JIT"] = "1" - - heap = padded_index_heap(np.zeros((10,)), np.arange(10), 20) print(heap.pop()) print(heap.pop()) diff --git a/example/rejection_ifp_sample.py b/example/rejection_ifp_sample.py index bac9b30..fdaf204 100644 --- a/example/rejection_ifp_sample.py +++ b/example/rejection_ifp_sample.py @@ -1,12 +1,8 @@ -import os - import matplotlib.pyplot as plt import numpy as np from numba_neighbors import kd_tree as kd -os.environ["NUMBA_DISABLE_JIT"] = "1" - N = 1024 n = 128 D = 2 diff --git a/example/rejection_sample.py b/example/rejection_sample.py index 26637c2..4f09dac 100644 --- a/example/rejection_sample.py +++ b/example/rejection_sample.py @@ -1,13 +1,8 @@ -import os - import matplotlib.pyplot as plt import numpy as np from numba_neighbors import kd_tree as kd -os.environ["NUMBA_DISABLE_JIT"] = "1" - - N = 1024 n = N D = 2 diff --git a/numba_neighbors/binary_tree.py b/numba_neighbors/binary_tree.py index 0669cac..a749573 100644 --- a/numba_neighbors/binary_tree.py +++ b/numba_neighbors/binary_tree.py @@ -4,7 +4,7 @@ import numba as nb import numpy as np -from numba_neighbors import index_heap2 as ih +from numba_neighbors import index_heap as ih FASTMATH = True PARALLEL = os.environ.get("NUMBA_PARALLEL", "1") != "0" @@ -49,7 +49,9 @@ def dual_swap(darr, iarr, i1, i2): @nb.njit() -def _simultaneous_sort(priorities: np.ndarray, values: np.ndarray) -> None: +def _simultaneous_sort( # pylint:disable=too-many-branches + priorities: np.ndarray, values: np.ndarray +) -> None: """ Recursively sort the arrays according to priorities in place. @@ -234,7 +236,7 @@ def partition_node_indices( swap(node_indices, midindex, right) if midindex == split_index: break - elif midindex < split_index: + if midindex < split_index: left = midindex + 1 else: right = midindex - 1 @@ -300,67 +302,6 @@ class IFPSampleQueryResult(NamedTuple): query_result: QueryResult -# class TreeData(NamedTuple): -# n_levels: int -# n_nodes: int -# data: FloatArray -# idx_array: IntArray -# idx_start: IntArray -# idx_end: IntArray -# is_leaf: BoolArray - -# @nb.njit(inline='always', fastmath=FASTMATH) -# def min_rdist(node_bounds, i_node, x): -# """Compute the minimum reduced-distance between a point and a node""" -# rdist = 0.0 - -# for j in range(x.size): -# d_lo = node_bounds[0, i_node, j] - x[j] -# d_hi = x[j] - node_bounds[1, i_node, j] -# d = ((d_lo + abs(d_lo)) + (d_hi + abs(d_hi))) / 2 -# rdist += d * d - -# return rdist - -# @nb.njit(inline='always', fastmath=FASTMATH) -# def min_dist(node_bounds, i_node, pt): -# return pow(min_rdist(node_bounds, i_node, pt), 0.5) - -# @nb.njit(inline='always', fastmath=FASTMATH) -# def max_rdist(node_bounds, i_node, x): -# """Compute the maximum reduced-distance between a point and a node""" -# rdist = 0.0 - -# for j in range(x.ize): -# d_lo = abs(x[j] - node_bounds[0, i_node, j]) -# d_hi = abs(x[j] - node_bounds[1, i_node, j]) -# d = max(d_lo, d_hi) -# rdist += d * d - -# return rdist - -# @nb.njit(inline='always', fastmath=FASTMATH) -# def max_dist(node_bounds, i_node, x): -# """Compute the maximum distance between a point and a node""" -# return pow(max_rdist(node_bounds, i_node, x), 0.5) - -# @nb.njit(inline='always', fastmath=FASTMATH) -# def _min_max_rdist(node_bounds, i_node, x): -# """Compute the minimum and maximum distance between a point and a node""" - -# min_dist = 0.0 -# max_dist = 0.0 - -# for j in range(x.size): -# d_lo = node_bounds[0, i_node, j] - x[j] -# d_hi = x[j] - node_bounds[1, i_node, j] -# d = (d_lo + abs(d_lo)) + (d_hi + abs(d_hi)) -# min_dist += pow(0.5 * d, 2) -# max_dist += pow(max(abs(d_lo), abs(d_hi)), 2) -# -# return min_dist, max_dist - - @nb.njit(parallel=PARALLEL, inline="always") def arange(length, dtype=INT_TYPE): """Simple `np.arange` implementation without start/step.""" @@ -448,7 +389,6 @@ def _recursive_build( n_points = idx_end_value - idx_start_value n_mid = n_points // 2 idx_array_slice = idx_array[idx_start_value:idx_end_value] - data = data # initialize node data # self._init_node(i_node, idx_start, idx_end) @@ -618,7 +558,6 @@ def rejection_ifp_sample_query_prealloc( dists[count:], query_indices[count:], counts[count:], - consumed, min_dists, heap, data, @@ -760,7 +699,6 @@ def ifp_sample_query_prealloc( dists: FloatArray, query_indices: IntArray, counts: IntArray, - consumed: BoolArray, min_dists: FloatArray, # in_size, minimum distances heap: ih.IndexHeap, # heapified IndexHeap # ----- @@ -783,7 +721,7 @@ def ifp_sample_query_prealloc( Args: query_r: float, reduced query radius. start_nodes: int array, node indices of tree data. - sample_indices, dists, query_indices, counts, consumed, in_dists, heap: + sample_indices, dists, query_indices, counts, in_dists, heap: preallocated data *tree_data: data from the input BinaryTree eps: float, the amount by which min_dist must be different to saved @@ -801,7 +739,7 @@ def ifp_sample_query_prealloc( top_dist, index = heap.pop() min_dist = min_dists[index] if np.isfinite(min_dist): - diff = abs(min_dist + top_dist) # top dist is negative + diff = abs(min_dist + top_dist) # top_dist is negative if diff > eps: continue sample_indices[count] = index @@ -1333,7 +1271,7 @@ def tree_spec(float_type=FLOAT_TYPE, int_type=INT_TYPE, bool_type=BOOL_TYPE): ] -class BinaryTree(object): +class BinaryTree: """ Base class for binary trees. @@ -1603,25 +1541,23 @@ def ifp_sample_query_prealloc( dists: FloatArray, query_indices: IntArray, counts: IntArray, - consumed: BoolArray, min_dists: FloatArray, # in_size, minimum distances heap: ih.IndexHeap, # assumed to be heapified ) -> float: return ifp_sample_query_prealloc( - query_r, - start_nodes, - sample_indices, - dists, - query_indices, - counts, - consumed, - min_dists, - heap, - self.data, - self.idx_array, - self.idx_start, - self.idx_end, - self.is_leaf, + query_r=query_r, + start_nodes=start_nodes, + sample_indices=sample_indices, + dists=dists, + query_indices=query_indices, + counts=counts, + min_dists=min_dists, + heap=heap, + data=self.data, + idx_array=self.idx_array, + idx_start=self.idx_start, + idx_end=self.idx_end, + is_leaf=self.is_leaf, node_data=self.node_data, rdist=self.rdist, min_max_rdist=self.min_max_rdist, @@ -1635,7 +1571,6 @@ def ifp_sample_query( dists = np.full(shape, np.inf, dtype=self.float_type) query_indices = np.full(shape, self.n_data, dtype=self.int_type) counts = np.full((sample_size,), -1, dtype=self.int_type) - consumed = np.zeros((self.n_data,), dtype=self.bool_type) min_dists = np.full((self.n_data,), -np.inf, dtype=self.float_type) # heap = list(zip(min_dists, arange(self.n_data,))) @@ -1646,15 +1581,14 @@ def ifp_sample_query( ) min_dists *= -1 min_dist = self.ifp_sample_query_prealloc( - query_r, - start_nodes, - sample_indices, - dists, - query_indices, - counts, - consumed, - min_dists, - heap, + query_r=query_r, + start_nodes=start_nodes, + sample_indices=sample_indices, + dists=dists, + query_indices=query_indices, + counts=counts, + min_dists=min_dists, + heap=heap, ) return IFPSampleQueryResult( @@ -1685,22 +1619,22 @@ def rejection_ifp_sample_query_prealloc( should be within this reduced distance of a sampled point. """ return rejection_ifp_sample_query_prealloc( - rejection_r, - query_r, - start_nodes, - sample_indices, - dists, - query_indices, - counts, - consumed, - min_dists, - heap_priorities, - heap_indices, - self.data, - self.idx_array, - self.idx_start, - self.idx_end, - self.is_leaf, + rejection_r=rejection_r, + query_r=query_r, + start_nodes=start_nodes, + sample_indices=sample_indices, + dists=dists, + query_indices=query_indices, + counts=counts, + consumed=consumed, + min_dists=min_dists, + heap_priorities=heap_priorities, + heap_indices=heap_indices, + data=self.data, + idx_array=self.idx_array, + idx_start=self.idx_start, + idx_end=self.idx_end, + is_leaf=self.is_leaf, node_data=self.node_data, rdist=self.rdist, min_max_rdist=self.min_max_rdist, @@ -1748,17 +1682,17 @@ def rejection_ifp_sample_query( heap_indices = np.empty((max_heap_length,), dtype=self.int_type) min_dist = self.rejection_ifp_sample_query_prealloc( - rejection_r, - query_r, - start_nodes, - sample_indices, - dists, - query_indices, - counts, - consumed, - min_dists, - heap_priorities, - heap_indices, + rejection_r=rejection_r, + query_r=query_r, + start_nodes=start_nodes, + sample_indices=sample_indices, + dists=dists, + query_indices=query_indices, + counts=counts, + consumed=consumed, + min_dists=min_dists, + heap_priorities=heap_priorities, + heap_indices=heap_indices, ) return IFPSampleQueryResult( diff --git a/numba_neighbors/index_heap2.py b/numba_neighbors/index_heap2.py index 24a81ed..d2f2069 100644 --- a/numba_neighbors/index_heap2.py +++ b/numba_neighbors/index_heap2.py @@ -7,7 +7,8 @@ from typing import Tuple import numpy as np -from numba import jitclass, njit, types +from numba import njit, types +from numba.experimental import jitclass @jitclass( diff --git a/numba_neighbors/index_heap_test.py b/numba_neighbors/index_heap_test.py index 442fa8a..6c58c88 100644 --- a/numba_neighbors/index_heap_test.py +++ b/numba_neighbors/index_heap_test.py @@ -1,5 +1,4 @@ import heapq -import os import unittest import numpy as np @@ -7,9 +6,6 @@ from numba_neighbors import index_heap as ih from numba_neighbors import index_heap2 as ih2 -os.environ["NUMBA_DISABLE_JIT"] = "1" - - max_length = 100 length = 50 diff --git a/numba_neighbors/kd_tree_test.py b/numba_neighbors/kd_tree_test.py index 26e8fcb..fe3ca47 100644 --- a/numba_neighbors/kd_tree_test.py +++ b/numba_neighbors/kd_tree_test.py @@ -152,9 +152,9 @@ def test_ifp_sample_consistent(self): r2, r2, start_indices, sample_size, max_neighbors ) - np.testing.assert_equal(sr.indices, sr2.indices) - np.testing.assert_allclose(sr.min_dists, sr2.min_dists) np.testing.assert_allclose(sr.min_dist, sr2.min_dist) + np.testing.assert_allclose(sr.min_dists, sr2.min_dists) + np.testing.assert_equal(sr.indices, sr2.indices) np.testing.assert_allclose(qr.dists, qr2.dists) np.testing.assert_equal(qr.indices, qr2.indices)