From 82014e953194641ca0c7bcb7637b59d325251065 Mon Sep 17 00:00:00 2001 From: Dominic Jack Date: Mon, 11 May 2020 16:37:31 +1000 Subject: [PATCH] reformat --- .isort.cfg | 6 + benchmarks/binary_tree/find_node_split_dim.py | 12 +- benchmarks/binary_tree/simultaneous_sort.py | 15 +- benchmarks/index_heap.py | 19 +- benchmarks/kd_tree/build.py | 24 +- benchmarks/kd_tree/ifp_sample.py | 52 +- benchmarks/kd_tree/query_radius.py | 32 +- benchmarks/kd_tree/rejection_sample.py | 42 +- benchmarks/kd_tree/sample_query_sample.py | 100 ++- example/compare_ifp.py | 20 +- example/eager.py | 73 +- example/ifp_sample.py | 66 +- example/ifp_sample_3d.py | 23 +- example/index_heap.py | 4 +- example/rejection_ifp_sample.py | 60 +- example/rejection_sample.py | 47 +- numba_neighbors/benchmark_utils.py | 17 +- numba_neighbors/binary_tree.py | 837 ++++++++++-------- numba_neighbors/binary_tree_test.py | 23 +- numba_neighbors/index_heap2.py | 56 +- numba_neighbors/index_heap_test.py | 28 +- numba_neighbors/kd_tree.py | 2 - numba_neighbors/kd_tree_test.py | 55 +- setup.py | 25 +- 24 files changed, 852 insertions(+), 786 deletions(-) create mode 100644 .isort.cfg diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..ba2778d --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,6 @@ +[settings] +multi_line_output=3 +include_trailing_comma=True +force_grid_wrap=0 +use_parentheses=True +line_length=88 diff --git a/benchmarks/binary_tree/find_node_split_dim.py b/benchmarks/binary_tree/find_node_split_dim.py index 729c142..c3b3c23 100644 --- a/benchmarks/binary_tree/find_node_split_dim.py +++ b/benchmarks/binary_tree/find_node_split_dim.py @@ -1,11 +1,7 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import numpy as np -from numba_neighbors.benchmark_utils import run_benchmarks, benchmark + from numba_neighbors import binary_tree as bt -import sklearn.neighbors +from numba_neighbors.benchmark_utils import benchmark, run_benchmarks N = 2048 NS = 1024 @@ -29,7 +25,7 @@ def get_data(): # data, node_indices, n_features, n_points) -@benchmark('numba') +@benchmark("numba") def numba_impl(): data = get_data() return bt.find_node_split_dim(*data) @@ -40,7 +36,7 @@ def numpy_find(data, node_indices): return np.argmax(np.ptp(data)) -@benchmark('numpy') +@benchmark("numpy") def numpy_impl(): return numpy_find(*get_data()) diff --git a/benchmarks/binary_tree/simultaneous_sort.py b/benchmarks/binary_tree/simultaneous_sort.py index 227ff6f..d7ce6b8 100644 --- a/benchmarks/binary_tree/simultaneous_sort.py +++ b/benchmarks/binary_tree/simultaneous_sort.py @@ -1,12 +1,9 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import numpy as np -from numba_neighbors.benchmark_utils import run_benchmarks -from numba_neighbors import binary_tree as bt import sklearn.neighbors +from numba_neighbors import binary_tree as bt +from numba_neighbors.benchmark_utils import run_benchmarks + N = 2048 k = 32 @@ -44,9 +41,5 @@ def numpy_impl(): run_benchmarks( - 10, - 100, - ('sklearn', sklearn_impl), - ('numba', numba_impl), - ('numpy', numpy_impl), + 10, 100, ("sklearn", sklearn_impl), ("numba", numba_impl), ("numpy", numpy_impl), ) diff --git a/benchmarks/index_heap.py b/benchmarks/index_heap.py index 10df2c1..d2fa044 100644 --- a/benchmarks/index_heap.py +++ b/benchmarks/index_heap.py @@ -1,14 +1,11 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import heapq + +import numpy as np +from numba import njit + +from numba_neighbors.benchmark_utils import BenchmarkManager from numba_neighbors.index_heap import IndexHeap from numba_neighbors.index_heap2 import IndexHeap as IndexHeap2 -from numba_neighbors.index_heap import padded_index_heap -from numba_neighbors.benchmark_utils import BenchmarkManager -from numba import njit -import numpy as np length = 1024 max_length = 32 * length @@ -16,7 +13,7 @@ heapify_bm = BenchmarkManager() -@heapify_bm.benchmark('heapq') +@heapify_bm.benchmark("heapq") def heapq_impl(): np.random.seed(123) priorities = np.random.random(size=(length,)).astype(np.float32) @@ -25,7 +22,7 @@ def heapq_impl(): heapq.heapify(heap) -@heapify_bm.benchmark('index_heap') +@heapify_bm.benchmark("index_heap") @njit() def index_heap_impl(): np.random.seed(123) @@ -37,7 +34,7 @@ def index_heap_impl(): iheap.heapify() -@heapify_bm.benchmark('index_heap2') +@heapify_bm.benchmark("index_heap2") @njit() def index_heap2_impl(): np.random.seed(123) diff --git a/benchmarks/kd_tree/build.py b/benchmarks/kd_tree/build.py index 4da50a8..91f8ba4 100644 --- a/benchmarks/kd_tree/build.py +++ b/benchmarks/kd_tree/build.py @@ -1,13 +1,9 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import numpy as np -from numba_neighbors.benchmark_utils import run_benchmarks, benchmark +import sklearn.neighbors + from numba_neighbors import binary_tree as bt from numba_neighbors import kd_tree as kd -# from numba_neighbors import kd_tree2 as kd2 -import sklearn.neighbors +from numba_neighbors.benchmark_utils import benchmark, run_benchmarks N = 1024 D = 3 @@ -17,30 +13,24 @@ leaf_size = 16 -@benchmark('sklearn') +@benchmark("sklearn") def sklearn_impl(): return sklearn.neighbors.kd_tree.KDTree(data, leaf_size=leaf_size) -@benchmark('base') +@benchmark("base") def numba_impl(): return bt.get_tree_data(data, leaf_size=leaf_size) -@benchmark('BinaryTree') +@benchmark("BinaryTree") def binary_tree(): - # return kd.KDTree(data, leaf_size=leaf_size) return bt.BinaryTree(data, leaf_size=leaf_size) -@benchmark('KDTree') +@benchmark("KDTree") def kd_tree(): - # return kd.KDTree(data, leaf_size=leaf_size) return kd.KDTree(data, leaf_size=leaf_size) -# @benchmark('cls2') -# def numba2(): -# return kd2.KDTree(data, leaf_size=leaf_size) - run_benchmarks(10, 100) diff --git a/benchmarks/kd_tree/ifp_sample.py b/benchmarks/kd_tree/ifp_sample.py index 81da1cb..682c3c6 100644 --- a/benchmarks/kd_tree/ifp_sample.py +++ b/benchmarks/kd_tree/ifp_sample.py @@ -1,20 +1,9 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - # os.environ['NUMBA_DISABLE_JIT'] = '1' import numpy as np -from numba import njit -from numba_neighbors.benchmark_utils import run_benchmarks, benchmark -from numba_neighbors import kd_tree as kd + from dcbs.np_utils.ifp.sample_query import ifp_sample_and_query -import functools +from numba_neighbors import kd_tree as kd +from numba_neighbors.benchmark_utils import benchmark, run_benchmarks N = 1024 sample_size = 512 @@ -32,37 +21,48 @@ @benchmark() def ifp(): tree = kd.KDTree(data, leaf_size) - return tree.ifp_sample_query(query_r**2, tree.get_node_indices(), - sample_size, max_neighbors) + return tree.ifp_sample_query( + query_r ** 2, tree.get_node_indices(), sample_size, max_neighbors + ) @benchmark() def rejection_ifp(): tree = kd.KDTree(data, leaf_size) - return tree.rejection_ifp_sample_query(rejection_r**2, query_r**2, - tree.get_node_indices(), sample_size, - max_neighbors) + return tree.rejection_ifp_sample_query( + rejection_r ** 2, + query_r ** 2, + tree.get_node_indices(), + sample_size, + max_neighbors, + ) @benchmark() def ifp3(): tree = kd.KDTree3(data, leaf_size) - return tree.ifp_sample_query(query_r**2, tree.get_node_indices(), - sample_size, max_neighbors) + return tree.ifp_sample_query( + query_r ** 2, tree.get_node_indices(), sample_size, max_neighbors + ) @benchmark() def rejection_ifp3(): tree = kd.KDTree3(data, leaf_size) - return tree.rejection_ifp_sample_query(rejection_r**2, query_r**2, - tree.get_node_indices(), sample_size, - max_neighbors) + return tree.rejection_ifp_sample_query( + rejection_r ** 2, + query_r ** 2, + tree.get_node_indices(), + sample_size, + max_neighbors, + ) @benchmark() def base(): - return ifp_sample_and_query(data, query_r, sample_size, max_neighbors, - max_neighbors) + return ifp_sample_and_query( + data, query_r, sample_size, max_neighbors, max_neighbors + ) run_benchmarks(20, 100) diff --git a/benchmarks/kd_tree/query_radius.py b/benchmarks/kd_tree/query_radius.py index b47ed9e..785dd53 100644 --- a/benchmarks/kd_tree/query_radius.py +++ b/benchmarks/kd_tree/query_radius.py @@ -1,13 +1,9 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import numpy as np -from numba_neighbors.benchmark_utils import run_benchmarks, benchmark -from numba_neighbors import kd_tree as kd -# from numba_neighbors import kd_tree2 as kd2 import sklearn.neighbors +from numba_neighbors import kd_tree as kd +from numba_neighbors.benchmark_utils import benchmark, run_benchmarks + N = 1024 n = 256 D = 3 @@ -24,7 +20,7 @@ sk_tree = sklearn.neighbors.kd_tree.KDTree(data, leaf_size=leaf_size) -@benchmark('sklearn') +@benchmark("sklearn") def sklearn_impl(): return sk_tree.query_radius(X, r, return_distance=True) @@ -32,7 +28,7 @@ def sklearn_impl(): numba_tree = kd.KDTree(data, leaf_size=leaf_size) -@benchmark('numba_pre') +@benchmark("numba_pre") def numba_prealloc(): dists = np.full((n, max_neighbors), np.inf, dtype=np.float32) indices = np.zeros((n, max_neighbors), dtype=np.int64) @@ -45,20 +41,21 @@ def numba(): return numba_tree.query_radius(X, r2, max_neighbors) -@benchmark('numba_bu') +@benchmark("numba_bu") def numba_bottom_up(): start_nodes = numba_tree.get_node_indices()[X_indices] return numba_tree.query_radius_bottom_up(X, r2, start_nodes, max_neighbors) -@benchmark('numba_bu_pre') +@benchmark("numba_bu_pre") def numba_bottom_up_prealloc(): dists = np.full((n, max_neighbors), np.inf, dtype=np.float32) indices = np.zeros((n, max_neighbors), dtype=np.int64) counts = np.zeros((n,), dtype=np.int64) start_nodes = numba_tree.get_node_indices()[X_indices] - numba_tree.query_radius_bottom_up_prealloc(X, r2, start_nodes, dists, - indices, counts) + numba_tree.query_radius_bottom_up_prealloc( + X, r2, start_nodes, dists, indices, counts + ) numba_tree3 = kd.KDTree3(data, leaf_size=leaf_size) @@ -69,20 +66,21 @@ def numba3(): return numba_tree3.query_radius(X, r2, max_neighbors) -@benchmark('numba3_bu') +@benchmark("numba3_bu") def numba3_bottom_up(): start_nodes = numba_tree3.get_node_indices()[X_indices] return numba_tree3.query_radius_bottom_up(X, r2, start_nodes, max_neighbors) -@benchmark('numba3_bu_pre') +@benchmark("numba3_bu_pre") def numba3_bottom_up_prealloc(): dists = np.full((n, max_neighbors), np.inf, dtype=np.float32) indices = np.zeros((n, max_neighbors), dtype=np.int64) counts = np.zeros((n,), dtype=np.int64) start_nodes = numba_tree3.get_node_indices()[X_indices] - numba_tree3.query_radius_bottom_up_prealloc(X, r2, start_nodes, dists, - indices, counts) + numba_tree3.query_radius_bottom_up_prealloc( + X, r2, start_nodes, dists, indices, counts + ) # numba_tree2 = kd2.KDTree(data, leaf_size=leaf_size) diff --git a/benchmarks/kd_tree/rejection_sample.py b/benchmarks/kd_tree/rejection_sample.py index 779404a..0e92429 100644 --- a/benchmarks/kd_tree/rejection_sample.py +++ b/benchmarks/kd_tree/rejection_sample.py @@ -1,21 +1,11 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os +import heapq # os.environ['NUMBA_DISABLE_JIT'] = '1' import numpy as np -from numba import njit -from numba_neighbors.benchmark_utils import run_benchmarks, benchmark + from numba_neighbors import kd_tree as kd -from dcbs.sparse.sample import ragged_in_place_and_down_sample_query_np -import functools -import heapq +from numba_neighbors.benchmark_utils import benchmark, run_benchmarks + heapq.heappush N = 1024 @@ -31,20 +21,28 @@ data /= np.linalg.norm(data, axis=-1, keepdims=True) -@benchmark('numba') +@benchmark("numba") def numba_impl(): tree = kd.KDTree(data, leaf_size) - return tree.rejection_sample_query(rejection_r**2, query_r**2, - tree.get_node_indices(), max_sample_size, - max_neighbors) + return tree.rejection_sample_query( + rejection_r ** 2, + query_r ** 2, + tree.get_node_indices(), + max_sample_size, + max_neighbors, + ) -@benchmark('numba3') +@benchmark("numba3") def numba3_impl(): tree = kd.KDTree3(data, leaf_size) - return tree.rejection_sample_query(rejection_r**2, query_r**2, - tree.get_node_indices(), max_sample_size, - max_neighbors) + return tree.rejection_sample_query( + rejection_r ** 2, + query_r ** 2, + tree.get_node_indices(), + max_sample_size, + max_neighbors, + ) # @benchmark() diff --git a/benchmarks/kd_tree/sample_query_sample.py b/benchmarks/kd_tree/sample_query_sample.py index 698ff51..83e8480 100644 --- a/benchmarks/kd_tree/sample_query_sample.py +++ b/benchmarks/kd_tree/sample_query_sample.py @@ -1,22 +1,15 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os +import functools +import heapq # os.environ['NUMBA_DISABLE_JIT'] = '1' import numpy as np from numba import njit -from numba_neighbors.benchmark_utils import run_benchmarks, benchmark + +from dcbs.sparse.sample import ragged_in_place_and_down_sample_query_np from numba_neighbors import binary_tree as bt from numba_neighbors import kd_tree as kd -from dcbs.sparse.sample import ragged_in_place_and_down_sample_query_np -import functools -import heapq +from numba_neighbors.benchmark_utils import benchmark, run_benchmarks + heapq.heappush n0 = 1024 @@ -40,13 +33,15 @@ data = np.random.uniform(size=(n0, D)).astype(kd.FLOAT_TYPE) data /= np.linalg.norm(data, axis=-1, keepdims=True) -kwargs = dict(data=data, - leaf_size=leaf_size, - n1=n1, - r0=r0, - r1=r1, - max_neigh0=max_neigh0, - max_neigh1=max_neigh1) +kwargs = dict( + data=data, + leaf_size=leaf_size, + n1=n1, + r0=r0, + r1=r1, + max_neigh0=max_neigh0, + max_neigh1=max_neigh1, +) @njit() @@ -54,19 +49,20 @@ def numba_impl(data, leaf_size, n1, r0, r1, max_neigh0, max_neigh1): tree = kd.KDTree(data, leaf_size) start_nodes = tree.get_node_indices() - query0 = tree.query_radius_bottom_up(data, r0**2, start_nodes, max_neigh0) + query0 = tree.query_radius_bottom_up(data, r0 ** 2, start_nodes, max_neigh0) - sample = bt.rejection_sample_precomputed(query0.indices, query0.counts, n1, - tree.int_type, tree.bool_type) - sample_indices = sample.indices[:sample.count] - query1 = tree.query_radius_bottom_up(data[sample_indices], r1**2, - start_nodes[sample_indices], - max_neigh1) + sample = bt.rejection_sample_precomputed( + query0.indices, query0.counts, n1, tree.int_type, tree.bool_type + ) + sample_indices = sample.indices[: sample.count] + query1 = tree.query_radius_bottom_up( + data[sample_indices], r1 ** 2, start_nodes[sample_indices], max_neigh1 + ) return query0, sample, query1 -num_impl = benchmark('numba')(functools.partial(numba_impl, **kwargs)) +num_impl = benchmark("numba")(functools.partial(numba_impl, **kwargs)) @njit() @@ -74,30 +70,40 @@ def numba3_impl(data, leaf_size, n1, r0, r1, max_neigh0, max_neigh1): tree = kd.KDTree3(data, leaf_size) start_nodes = tree.get_node_indices() - query0 = tree.query_radius_bottom_up(data, r0**2, start_nodes, max_neigh0) - - sample = bt.rejection_sample_precomputed(query0.indices, query0.counts, n1, - tree.int_type, tree.bool_type) - sample_indices = sample.indices[:sample.count] - query1 = tree.query_radius_bottom_up(data[sample_indices], r1**2, - start_nodes[sample_indices], - max_neigh1) + query0 = tree.query_radius_bottom_up(data, r0 ** 2, start_nodes, max_neigh0) + + sample = bt.rejection_sample_precomputed( + query0.indices, query0.counts, n1, tree.int_type, tree.bool_type + ) + sample_indices = sample.indices[: sample.count] + query1 = tree.query_radius_bottom_up( + data[sample_indices], r1 ** 2, start_nodes[sample_indices], max_neigh1 + ) return query0, sample, query1 -benchmark('numba3')(functools.partial(numba3_impl, **kwargs)) +benchmark("numba3")(functools.partial(numba3_impl, **kwargs)) @benchmark() def original(): - (ip_dists, ip_indices, sample_indices, sample_size, ds_dists, - ds_indices) = ragged_in_place_and_down_sample_query_np( - data, n0, r0, max_neigh0, n1, r1, max_neigh1) - q0 = bt.QueryResult(ip_dists, ip_indices, - np.count_nonzero(np.isfinite(ip_dists), axis=1)) + ( + ip_dists, + ip_indices, + sample_indices, + sample_size, + ds_dists, + ds_indices, + ) = ragged_in_place_and_down_sample_query_np( + data, n0, r0, max_neigh0, n1, r1, max_neigh1 + ) + q0 = bt.QueryResult( + ip_dists, ip_indices, np.count_nonzero(np.isfinite(ip_dists), axis=1) + ) sample = bt.RejectionSampleResult(sample_indices, sample_size) - q1 = bt.QueryResult(ds_dists, ds_indices, - np.count_nonzero(np.isfinite(ds_dists), axis=1)) + q1 = bt.QueryResult( + ds_dists, ds_indices, np.count_nonzero(np.isfinite(ds_dists), axis=1) + ) return q0, sample, q1 @@ -106,13 +112,13 @@ def original(): print(np.max(q0.counts), np.mean(q0.counts)) print(s.count) -counts = q1.counts[:s.count] +counts = q1.counts[: s.count] print(np.max(counts), np.mean(counts)) orig_q0, orig_s, orig_q1 = original() -print('---') +print("---") print(np.max(orig_q0.counts), np.mean(orig_q0.counts)) print(orig_s.count) -counts = orig_q1.counts[:orig_s.count] +counts = orig_q1.counts[: orig_s.count] print(np.max(counts), np.mean(counts)) print(orig_q1.dists.shape) diff --git a/example/compare_ifp.py b/example/compare_ifp.py index 6ff613e..d0d49d8 100644 --- a/example/compare_ifp.py +++ b/example/compare_ifp.py @@ -1,17 +1,12 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -os.environ['NUMBA_DISABLE_JIT'] = '1' import numpy as np -from numba_neighbors import kd_tree as kd + from numba_neighbors import binary_tree as bt +from numba_neighbors import kd_tree as kd + +os.environ["NUMBA_DISABLE_JIT"] = "1" + N = 100 n = 50 @@ -21,7 +16,7 @@ max_neighbors = 100 leaf_size = 16 -r2 = query_r**2 +r2 = query_r ** 2 np.random.seed(124) data = np.random.uniform(size=(N, D)).astype(kd.FLOAT_TYPE) @@ -30,8 +25,7 @@ tree = kd.KDTree(data, leaf_size=leaf_size) -qr = tree.query_radius_bottom_up(data, r2, tree.get_node_indices(), - max_neighbors) +qr = tree.query_radius_bottom_up(data, r2, tree.get_node_indices(), max_neighbors) sr_rej = bt.rejection_ifp_sample_precomputed(qr.dists, qr.indices, qr.counts, n) print(sr_rej.indices) diff --git a/example/eager.py b/example/eager.py index 5f3a43a..c6e820a 100644 --- a/example/eager.py +++ b/example/eager.py @@ -1,18 +1,40 @@ import numpy as np -from numba import njit, prange -from numba import int64, float32, float64, void -from numba_neighbors.kd_tree import KDTree3 +from numba import float32, float64, int64, njit, prange, void + from numba_neighbors.binary_tree import PARALLEL +from numba_neighbors.kd_tree import KDTree3 -@njit(void(float32[:, :], int64, int64, float64, int64, int64, int64, int64[:], - float32[:, :], int64[:, :], int64[:]), - parallel=PARALLEL, - fastmath=True) -def ifp_sample_query_prealloc(coords: np.ndarray, valid_size: int, - out_size: int, radius: float, k_query: int, - k_return: int, leaf_size: int, sample_indices, - dists, neigh_indices, row_lengths): +@njit( + void( + float32[:, :], + int64, + int64, + float64, + int64, + int64, + int64, + int64[:], + float32[:, :], + int64[:, :], + int64[:], + ), + parallel=PARALLEL, + fastmath=True, +) +def ifp_sample_query_prealloc( + coords: np.ndarray, + valid_size: int, + out_size: int, + radius: float, + k_query: int, + k_return: int, + leaf_size: int, + sample_indices, + dists, + neigh_indices, + row_lengths, +): r2 = radius * radius coords = coords[:valid_size] tree = KDTree3(coords, leaf_size) @@ -29,19 +51,30 @@ def ifp_sample_query_prealloc(coords: np.ndarray, valid_size: int, k = min(valid_size, k_query) for i in prange(out_size): # pylint: disable=not-an-iterable neigh_indices[i] = i - tree.query_radius_bottom_up_prealloc(coords, r2, start_nodes, - dists[:valid_size, :k], - neigh_indices[:valid_size, :k], - row_lengths[:valid_size]) + tree.query_radius_bottom_up_prealloc( + coords, + r2, + start_nodes, + dists[:valid_size, :k], + neigh_indices[:valid_size, :k], + row_lengths[:valid_size], + ) out_size = valid_size else: # we have to do the sample consumed = np.zeros((out_size,), dtype=np.uint8) min_dists = np.full((valid_size,), np.inf, dtype=np.float32) - _ = tree.rejection_ifp_sample_query_prealloc(r2, r2, start_nodes, - sample_indices, dists, - neigh_indices, row_lengths, - consumed, min_dists) + _ = tree.rejection_ifp_sample_query_prealloc( + r2, + r2, + start_nodes, + sample_indices, + dists, + neigh_indices, + row_lengths, + consumed, + min_dists, + ) # sample_result, query_result = tree.ifp_sample_query( # r2, start_nodes, out_size, k_query) @@ -69,4 +102,4 @@ def ifp_sample_query_prealloc(coords: np.ndarray, valid_size: int, dis[k] = np.sqrt(dis[k]) -print('done') +print("done") diff --git a/example/ifp_sample.py b/example/ifp_sample.py index 3d22aad..81cc2eb 100644 --- a/example/ifp_sample.py +++ b/example/ifp_sample.py @@ -1,17 +1,12 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -os.environ['NUMBA_DISABLE_JIT'] = '1' +import matplotlib.pyplot as plt import numpy as np + from numba_neighbors import kd_tree as kd -import matplotlib.pyplot as plt + +os.environ["NUMBA_DISABLE_JIT"] = "1" + N = 1024 n = 256 @@ -21,27 +16,31 @@ max_neighbors = 64 leaf_size = 16 -r2 = query_r**2 +r2 = query_r ** 2 np.random.seed(124) data = np.random.uniform(size=(N, D)).astype(kd.FLOAT_TYPE) tree = kd.KDTree(data, leaf_size=leaf_size) -sample_result, query_result = tree.ifp_sample_query(r2, tree.get_node_indices(), - n, max_neighbors) +sample_result, query_result = tree.ifp_sample_query( + r2, tree.get_node_indices(), n, max_neighbors +) sample_result, query_result = tree.rejection_ifp_sample_query( - r2, r2, tree.get_node_indices(), n, max_neighbors) + r2, r2, tree.get_node_indices(), n, max_neighbors +) -def vis(x0, - sample_indices, - query_result, - small_balls=True, - big_balls=False, - labels=False, - aspect=1): +def vis( + x0, + sample_indices, + query_result, + small_balls=True, + big_balls=False, + labels=False, + aspect=1, +): x1 = x0[sample_indices] - xn = x0[query_result.indices[0, :query_result.counts[0]]] + xn = x0[query_result.indices[0, : query_result.counts[0]]] x10 = x1[0] x0 = x0.T x1 = x1.T @@ -52,26 +51,21 @@ def vis(x0, for x in x1.T: if small_balls: ax.add_patch( - plt.Circle(x, - radius=query_r / 2, - alpha=0.4, - fill=1, - color='red')) + plt.Circle(x, radius=query_r / 2, alpha=0.4, fill=1, color="red") + ) if big_balls: - ax.add_patch( - plt.Circle(x, radius=query_r, alpha=0.15, fill=1, color='red')) + ax.add_patch(plt.Circle(x, radius=query_r, alpha=0.15, fill=1, color="red")) if labels: for i, xi in enumerate(x0.T): ax.annotate(str(i), xi) - ax.scatter(*x0, c='blue', alpha=0.5, s=10) - ax.scatter(*x1, c='red', alpha=1, s=10) - ax.scatter(*xn, c='green', alpha=1, s=10) - ax.scatter(*x10, c='black', alpha=1, s=10) - ax.add_patch( - plt.Circle(x10, radius=query_r, alpha=0.15, fill=1, color='green')) - ax.axis('off') + ax.scatter(*x0, c="blue", alpha=0.5, s=10) + ax.scatter(*x1, c="red", alpha=1, s=10) + ax.scatter(*xn, c="green", alpha=1, s=10) + ax.scatter(*x10, c="black", alpha=1, s=10) + ax.add_patch(plt.Circle(x10, radius=query_r, alpha=0.15, fill=1, color="green")) + ax.axis("off") ax.set_aspect(1) diff --git a/example/ifp_sample_3d.py b/example/ifp_sample_3d.py index 9745e18..96c2cc9 100644 --- a/example/ifp_sample_3d.py +++ b/example/ifp_sample_3d.py @@ -1,8 +1,6 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import numpy as np +import trimesh + from numba_neighbors.kd_tree import KDTree N = 1024 @@ -14,15 +12,15 @@ coords = np.random.normal(size=(N, 3)).astype(np.float32) coords /= np.linalg.norm(coords, axis=-1)[:, np.newaxis] -r2 = query_r**2 +r2 = query_r ** 2 tree = KDTree(coords, leaf_size=leaf_size) sample_result, query_result = tree.rejection_ifp_sample_query( - r2, r2, tree.get_node_indices(), out_size, k_query) + r2, r2, tree.get_node_indices(), out_size, k_query +) # sample_result, query_result = tree.ifp_sample_query(r2, tree.get_node_indices(), # out_size, k_query) -print(query_result.dists[0, :query_result.counts[0]]) -import trimesh +print(query_result.dists[0, : query_result.counts[0]]) red = [[255, 0, 0]] green = [[0, 255, 0]] blue = [[0, 0, 255]] @@ -33,11 +31,14 @@ pc = trimesh.PointCloud(coords, colors=np.full((N, 3), 255, dtype=np.uint8)) scene = pc.scene() scene.add_geometry( - trimesh.PointCloud(sampled_coords, colors=np.tile(blue, (out_size, 1)))) + trimesh.PointCloud(sampled_coords, colors=np.tile(blue, (out_size, 1))) +) rl = query_result.counts[0] scene.add_geometry( - trimesh.PointCloud(coords[query_result.indices[0, :rl]], - colors=np.tile(green, (rl, 1)))) + trimesh.PointCloud( + coords[query_result.indices[0, :rl]], colors=np.tile(green, (rl, 1)) + ) +) scene.add_geometry(trimesh.PointCloud(sampled_coords[:1], colors=red)) # scene.add_geometry( # trimesh.primitives.Sphere(radius=radius, center=sampled_coords[0])) diff --git a/example/index_heap.py b/example/index_heap.py index 5abc022..4bd5f1b 100644 --- a/example/index_heap.py +++ b/example/index_heap.py @@ -1,10 +1,12 @@ import os -os.environ['NUMBA_DISABLE_JIT'] = '1' import numpy as np from numba_neighbors.index_heap import padded_index_heap +os.environ["NUMBA_DISABLE_JIT"] = "1" + + heap = padded_index_heap(np.zeros((10,)), np.arange(10), 20) print(heap.pop()) print(heap.pop()) diff --git a/example/rejection_ifp_sample.py b/example/rejection_ifp_sample.py index 0b42ce0..bac9b30 100644 --- a/example/rejection_ifp_sample.py +++ b/example/rejection_ifp_sample.py @@ -1,17 +1,11 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -os.environ['NUMBA_DISABLE_JIT'] = '1' +import matplotlib.pyplot as plt import numpy as np + from numba_neighbors import kd_tree as kd -import matplotlib.pyplot as plt + +os.environ["NUMBA_DISABLE_JIT"] = "1" N = 1024 n = 128 @@ -24,23 +18,26 @@ np.random.seed(124) data = np.random.uniform(size=(N, D)).astype(kd.FLOAT_TYPE) -r2 = query_r**2 +r2 = query_r ** 2 tree = kd.KDTree(data, leaf_size=leaf_size) sample_result, query_result = tree.rejection_ifp_sample_query( - r2, r2, tree.get_node_indices(), n, max_neighbors) + r2, r2, tree.get_node_indices(), n, max_neighbors +) print(np.max(query_result.counts)) print(sample_result.min_dist, np.max(sample_result.min_dists)) -def vis(x0, - sample_indices, - query_result, - small_balls=True, - big_balls=False, - labels=False, - aspect=1): +def vis( + x0, + sample_indices, + query_result, + small_balls=True, + big_balls=False, + labels=False, + aspect=1, +): x1 = x0[sample_indices] - xn = x0[query_result.indices[0, :query_result.counts[0]]] + xn = x0[query_result.indices[0, : query_result.counts[0]]] x10 = x1[0] x0 = x0.T x1 = x1.T @@ -51,26 +48,21 @@ def vis(x0, for x in x1.T: if small_balls: ax.add_patch( - plt.Circle(x, - radius=query_r / 2, - alpha=0.4, - fill=1, - color='red')) + plt.Circle(x, radius=query_r / 2, alpha=0.4, fill=1, color="red") + ) if big_balls: - ax.add_patch( - plt.Circle(x, radius=query_r, alpha=0.15, fill=1, color='red')) + ax.add_patch(plt.Circle(x, radius=query_r, alpha=0.15, fill=1, color="red")) if labels: for i, xi in enumerate(x0.T): ax.annotate(str(i), xi) - ax.scatter(*x0, c='blue', alpha=0.5, s=10) - ax.scatter(*x1, c='red', alpha=1, s=10) - ax.scatter(*xn, c='green', alpha=1, s=10) - ax.scatter(*x10, c='black', alpha=1, s=10) - ax.add_patch( - plt.Circle(x10, radius=query_r, alpha=0.15, fill=1, color='green')) - ax.axis('off') + ax.scatter(*x0, c="blue", alpha=0.5, s=10) + ax.scatter(*x1, c="red", alpha=1, s=10) + ax.scatter(*xn, c="green", alpha=1, s=10) + ax.scatter(*x10, c="black", alpha=1, s=10) + ax.add_patch(plt.Circle(x10, radius=query_r, alpha=0.15, fill=1, color="green")) + ax.axis("off") ax.set_aspect(1) diff --git a/example/rejection_sample.py b/example/rejection_sample.py index e97e801..26637c2 100644 --- a/example/rejection_sample.py +++ b/example/rejection_sample.py @@ -1,17 +1,12 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -os.environ['NUMBA_DISABLE_JIT'] = '1' +import matplotlib.pyplot as plt import numpy as np + from numba_neighbors import kd_tree as kd -import matplotlib.pyplot as plt + +os.environ["NUMBA_DISABLE_JIT"] = "1" + N = 1024 n = N @@ -28,17 +23,13 @@ tree = kd.KDTree(data, leaf_size=leaf_size) sample_result, query_result = tree.rejection_sample_query( - rejection_r**2, query_r**2, tree.get_node_indices(), n, max_neighbors) + rejection_r ** 2, query_r ** 2, tree.get_node_indices(), n, max_neighbors +) print(np.max(query_result.counts)) print(sample_result.count) -def vis(x0, - sample_indices, - small_balls=True, - big_balls=True, - labels=False, - aspect=1): +def vis(x0, sample_indices, small_balls=True, big_balls=True, labels=False, aspect=1): x1 = x0[sample_indices] x0 = x0.T x1 = x1.T @@ -49,28 +40,22 @@ def vis(x0, for x in x1.T: if small_balls: ax.add_patch( - plt.Circle(x, - radius=rejection_r / 2, - alpha=0.4, - fill=1, - color='red')) + plt.Circle(x, radius=rejection_r / 2, alpha=0.4, fill=1, color="red") + ) if big_balls: ax.add_patch( - plt.Circle(x, - radius=rejection_r, - alpha=0.15, - fill=1, - color='red')) + plt.Circle(x, radius=rejection_r, alpha=0.15, fill=1, color="red") + ) if labels: for i, xi in enumerate(x0.T): ax.annotate(str(i), xi) - ax.scatter(*x0, c='blue', alpha=0.5, s=10) - ax.scatter(*x1, c='red', alpha=1, s=10) - ax.axis('off') + ax.scatter(*x0, c="blue", alpha=0.5, s=10) + ax.scatter(*x1, c="red", alpha=1, s=10) + ax.axis("off") ax.set_aspect(1) -vis(data, sample_result.indices[:sample_result.count]) +vis(data, sample_result.indices[: sample_result.count]) plt.show() diff --git a/numba_neighbors/benchmark_utils.py b/numba_neighbors/benchmark_utils.py index cc7d74b..48f4759 100644 --- a/numba_neighbors/benchmark_utils.py +++ b/numba_neighbors/benchmark_utils.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from timeit import timeit + import numpy as np @@ -11,27 +9,24 @@ def run_benchmarks(burn_iters, num_iters, *names_and_fns): times = [] names, _ = zip(*names_and_fns) for name, fn in names_and_fns: - print('Burning in {}...'.format(name)) + print("Burning in {}...".format(name)) for _ in range(burn_iters): fn() - print('Benchmarking {}...'.format(name)) + print("Benchmarking {}...".format(name)) times.append(timeit(fn, number=num_iters) / num_iters) indices = np.argsort(times) t0 = times[indices[0]] for i in indices: - print('{:20}: {:.10f} ({:.2f})'.format(names[i], times[i], - times[i] / t0)) + print("{:20}: {:.10f} ({:.2f})".format(names[i], times[i], times[i] / t0)) class BenchmarkManager(object): - - def __init__(self, name='default'): + def __init__(self, name="default"): self.names_and_fns = [] self.name = name def benchmark(self, name=None): - def f(decorated): actual_name = decorated.__name__ if name is None else name self.names_and_fns.append((actual_name, decorated)) @@ -40,7 +35,7 @@ def f(decorated): return f def run_benchmarks(self, burn_iters, num_iters): - print('Running {} benchmarks...'.format(self.name)) + print("Running {} benchmarks...".format(self.name)) run_benchmarks(burn_iters, num_iters, *self.names_and_fns) diff --git a/numba_neighbors/binary_tree.py b/numba_neighbors/binary_tree.py index c3c971f..0669cac 100644 --- a/numba_neighbors/binary_tree.py +++ b/numba_neighbors/binary_tree.py @@ -1,15 +1,13 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -from typing import Any, Callable, NamedTuple, Tuple, TypeVar, Optional +from typing import Callable, NamedTuple, Optional, Tuple + import numba as nb import numpy as np + from numba_neighbors import index_heap2 as ih FASTMATH = True -PARALLEL = os.environ.get('NUMBA_PARALLEL', '1') != '0' +PARALLEL = os.environ.get("NUMBA_PARALLEL", "1") != "0" INT_TYPE = np.int64 INT_TYPE_T = nb.int64 @@ -30,7 +28,7 @@ MinMaxRDist = Callable[[NodeDataArray, FloatArray], Tuple[float, float]] -@nb.njit(inline='always') +@nb.njit(inline="always") def swap(arr, i1, i2): """Swap values at index i1 and i2 of arr.""" tmp = arr[i1] @@ -38,7 +36,7 @@ def swap(arr, i1, i2): arr[i2] = tmp -@nb.njit(inline='always') +@nb.njit(inline="always") def dual_swap(darr, iarr, i1, i2): """swap the values at inex i1 and i2 of both darr and iarr""" dtmp = darr[i1] @@ -128,23 +126,24 @@ def simultaneous_sort(priorities: np.ndarray, values: np.ndarray) -> None: priorities: 2D array values: ND array, N >= 2, where priorities.shape == values.shape[:2]. """ - assert (priorities.shape == values.shape[:2]) - assert (len(priorities.shape) == 2) + assert priorities.shape == values.shape[:2] + assert len(priorities.shape) == 2 for row in nb.prange(priorities.shape[0]): # pylint: disable=not-an-iterable _simultaneous_sort(priorities[row], values[row]) @nb.njit(parallel=PARALLEL) -def simultaneous_sort_partial(priorities: np.ndarray, values: np.ndarray, - counts: IntArray): +def simultaneous_sort_partial( + priorities: np.ndarray, values: np.ndarray, counts: IntArray +): """In-place simultaneous sort the given row of the arrays This python wrapper exists primarily to enable unit testing of the _simultaneous_sort C routine. """ - assert (priorities.shape == values.shape) - assert (len(priorities.shape) == 2) - assert (priorities.shape[:1] == counts.shape) + assert priorities.shape == values.shape + assert len(priorities.shape) == 2 + assert priorities.shape[:1] == counts.shape for row in nb.prange(priorities.shape[0]): # pylint: disable=not-an-iterable count = counts[row] _simultaneous_sort(priorities[row, :count], values[row, :count]) @@ -199,8 +198,9 @@ def find_node_split_dim(data: FloatArray, node_indices: IntArray) -> int: @nb.njit() -def partition_node_indices(data: FloatArray, node_indices: IntArray, - split_dim: int, split_index: int): +def partition_node_indices( + data: FloatArray, node_indices: IntArray, split_dim: int, split_index: int +): """ Partition points in the node into two equal-sized groups. @@ -361,7 +361,7 @@ class IFPSampleQueryResult(NamedTuple): # return min_dist, max_dist -@nb.njit(parallel=PARALLEL, inline='always') +@nb.njit(parallel=PARALLEL, inline="always") def arange(length, dtype=INT_TYPE): """Simple `np.arange` implementation without start/step.""" out = np.empty((length,), dtype=dtype) @@ -371,10 +371,9 @@ def arange(length, dtype=INT_TYPE): @nb.jit() -def create_tree_data(data: FloatArray, - leaf_size: int = 40, - int_type=INT_TYPE, - bool_type=BOOL_TYPE): +def create_tree_data( + data: FloatArray, leaf_size: int = 40, int_type=INT_TYPE, bool_type=BOOL_TYPE +): if data.size == 0: raise ValueError("X is an empty array") @@ -398,9 +397,14 @@ def create_tree_data(data: FloatArray, @nb.njit() -def fill_tree_data(data: FloatArray, leaf_size: int, idx_array: IntArray, - idx_start: IntArray, idx_end: IntArray, - is_leaf: BoolArray) -> None: +def fill_tree_data( + data: FloatArray, + leaf_size: int, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, +) -> None: """ Get data associated with BinaryTree. @@ -422,22 +426,23 @@ def fill_tree_data(data: FloatArray, leaf_size: int, idx_array: IntArray, # validate data n_data = data.shape[0] n_nodes = idx_start.size - _recursive_build(0, 0, n_data, leaf_size, n_nodes, data, idx_array, - idx_start, idx_end, is_leaf) + _recursive_build( + 0, 0, n_data, leaf_size, n_nodes, data, idx_array, idx_start, idx_end, is_leaf + ) @nb.njit() def _recursive_build( - i_node: int, - idx_start_value: int, - idx_end_value: int, - leaf_size: int, - n_nodes: int, - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, + i_node: int, + idx_start_value: int, + idx_end_value: int, + leaf_size: int, + n_nodes: int, + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, ): """Recursively build the tree.""" n_points = idx_end_value - idx_start_value @@ -457,7 +462,8 @@ def _recursive_build( # we'll proactively prevent memory errors, but raise a # warning saying we're doing so. raise Exception( - 'Internal memory layout is flawed: not enough nodes allocated') + "Internal memory layout is flawed: not enough nodes allocated" + ) # import warnings # warnings.warn("Internal: memory layout is flawed: " # "not enough nodes allocated") @@ -465,8 +471,7 @@ def _recursive_build( elif n_points < 2: # again, this shouldn't happen if our memory allocation # is correct. Raise a warning. - raise Exception( - 'Internal memory layout is flawed: too many nodes allocated') + raise Exception("Internal memory layout is flawed: too many nodes allocated") # import warnings # warnings.warn("Internal: memory layout is flawed: " # "too many nodes allocated") @@ -478,15 +483,33 @@ def _recursive_build( i_max = find_node_split_dim(data, idx_array_slice) partition_node_indices(data, idx_array_slice, i_max, n_mid) idx_mid_value = idx_start_value + n_mid - _recursive_build(2 * i_node + 1, idx_start_value, idx_mid_value, - leaf_size, n_nodes, data, idx_array, idx_start, - idx_end, is_leaf) - _recursive_build(2 * i_node + 2, idx_mid_value, idx_end_value, - leaf_size, n_nodes, data, idx_array, idx_start, - idx_end, is_leaf) + _recursive_build( + 2 * i_node + 1, + idx_start_value, + idx_mid_value, + leaf_size, + n_nodes, + data, + idx_array, + idx_start, + idx_end, + is_leaf, + ) + _recursive_build( + 2 * i_node + 2, + idx_mid_value, + idx_end_value, + leaf_size, + n_nodes, + data, + idx_array, + idx_start, + idx_end, + is_leaf, + ) -@nb.njit(inline='always') +@nb.njit(inline="always") def _update_min_dists(dists, query_indices, counts, count, min_dists): for i in range(count): c = counts[i] @@ -501,27 +524,27 @@ def _update_min_dists(dists, query_indices, counts, count, min_dists): @nb.njit() def rejection_ifp_sample_query_prealloc( - rejection_r: float, - query_r: float, - start_nodes: IntArray, - # ----- pre-allocated data - sample_indices: IntArray, - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - consumed: BoolArray, - min_dists: FloatArray, - heap_priorities: FloatArray, - heap_indices: IntArray, - # ----- tree data - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + rejection_r: float, + query_r: float, + start_nodes: IntArray, + # ----- pre-allocated data + sample_indices: IntArray, + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + consumed: BoolArray, + min_dists: FloatArray, + heap_priorities: FloatArray, + heap_indices: IntArray, + # ----- tree data + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ) -> float: """ Rejection-iterative farthest point sampling and querying. @@ -611,14 +634,15 @@ def rejection_ifp_sample_query_prealloc( @nb.njit() def ifp_sample_precomputed_prealloc( - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - # --- precomputed data - sample_indices: IntArray, - min_dists: FloatArray, - heap, - eps: float = 1e-8): + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + # --- precomputed data + sample_indices: IntArray, + min_dists: FloatArray, + heap, + eps: float = 1e-8, +): count = 0 sample_size = sample_indices.size top_dist = -np.inf @@ -646,105 +670,111 @@ def ifp_sample_precomputed_prealloc( if count >= sample_size: break else: - raise RuntimeError('Should have broken...') + raise RuntimeError("Should have broken...") return -top_dist @nb.njit(fastmath=True) -def ifp_sample_precomputed(dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - sample_size: int, - eps=1e-8) -> IFPSampleResult: +def ifp_sample_precomputed( + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + sample_size: int, + eps=1e-8, +) -> IFPSampleResult: in_size, max_counts = dists.shape int_type = query_indices.dtype sample_indices = np.empty((sample_size,), dtype=int_type) min_dists = np.full((in_size,), -np.inf, dtype=np.float32) - heap = ih.padded_index_heap(min_dists, arange(in_size, dtype=int_type), - sample_size * max_counts + in_size) + heap = ih.padded_index_heap( + min_dists, arange(in_size, dtype=int_type), sample_size * max_counts + in_size + ) min_dists *= -1 - min_dist = ifp_sample_precomputed_prealloc(dists, - query_indices, - counts, - sample_indices, - min_dists, - heap, - eps=eps) + min_dist = ifp_sample_precomputed_prealloc( + dists, query_indices, counts, sample_indices, min_dists, heap, eps=eps + ) return IFPSampleResult(sample_indices, min_dists, min_dist) @nb.njit(fastmath=True) def rejection_ifp_sample_precomputed_prealloc( - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - # -- prealloc - sample_indices: IntArray, - min_dists: FloatArray, - consumed: BoolArray, - eps: float = 1e-8) -> float: + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + # -- prealloc + sample_indices: IntArray, + min_dists: FloatArray, + consumed: BoolArray, + eps: float = 1e-8, +) -> float: in_size, max_counts = dists.shape - count = rejection_sample_precomputed_prealloc(query_indices, counts, - sample_indices, consumed) + count = rejection_sample_precomputed_prealloc( + query_indices, counts, sample_indices, consumed + ) si = sample_indices[:count] - _update_min_dists(dists[si], query_indices[si], counts[si], count, - min_dists) + _update_min_dists(dists[si], query_indices[si], counts[si], count, min_dists) if count == sample_indices.size: return np.inf min_dists *= -1 - heap = ih.padded_index_heap(min_dists, - arange(in_size, dtype=sample_indices.dtype), - sample_indices.size * max_counts + in_size) + heap = ih.padded_index_heap( + min_dists, + arange(in_size, dtype=sample_indices.dtype), + sample_indices.size * max_counts + in_size, + ) heap.heapify() min_dists *= -1 - min_dist = ifp_sample_precomputed_prealloc(dists, query_indices, counts, - sample_indices[count:], - min_dists, heap, eps) + min_dist = ifp_sample_precomputed_prealloc( + dists, query_indices, counts, sample_indices[count:], min_dists, heap, eps + ) return min_dist @nb.njit(fastmath=True) -def rejection_ifp_sample_precomputed(dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - sample_size: int, - bool_type=BOOL_TYPE, - eps=1e-8) -> IFPSampleResult: +def rejection_ifp_sample_precomputed( + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + sample_size: int, + bool_type=BOOL_TYPE, + eps=1e-8, +) -> IFPSampleResult: in_size = counts.size sample_indices = np.empty((sample_size,), dtype=query_indices.dtype) min_dists = np.full((in_size,), np.inf, dtype=dists.dtype) consumed = np.zeros((in_size,), dtype=bool_type) min_dist = rejection_ifp_sample_precomputed_prealloc( - dists, query_indices, counts, sample_indices, min_dists, consumed, eps) + dists, query_indices, counts, sample_indices, min_dists, consumed, eps + ) return IFPSampleResult(sample_indices, min_dists, min_dist) @nb.njit() def ifp_sample_query_prealloc( - query_r: float, - start_nodes: IntArray, - # ----- - # pre-allocated data - sample_indices: IntArray, - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - consumed: BoolArray, - min_dists: FloatArray, # in_size, minimum distances - heap: ih.IndexHeap, # heapified IndexHeap - # ----- - # tree data - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, - eps: float = 1e-8) -> float: + query_r: float, + start_nodes: IntArray, + # ----- + # pre-allocated data + sample_indices: IntArray, + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + consumed: BoolArray, + min_dists: FloatArray, # in_size, minimum distances + heap: ih.IndexHeap, # heapified IndexHeap + # ----- + # tree data + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, + eps: float = 1e-8, +) -> float: """ Perform iterative farthest point sampling and querying. @@ -806,16 +836,18 @@ def ifp_sample_query_prealloc( if count >= sample_size: break else: - raise RuntimeError('Should have broken...') + raise RuntimeError("Should have broken...") return -top_dist @nb.njit() -def rejection_sample_precomputed_prealloc(query_indices: IntArray, - counts: IntArray, - sample_indices: IntArray, - consumed: BoolArray, - valid: Optional[BoolArray]=None) -> int: +def rejection_sample_precomputed_prealloc( + query_indices: IntArray, + counts: IntArray, + sample_indices: IntArray, + consumed: BoolArray, + valid: Optional[BoolArray] = None, +) -> int: """ Perform rejection sampling with precomputed sample indices. @@ -851,13 +883,15 @@ def rejection_sample_precomputed_prealloc(query_indices: IntArray, return sample_count -@nb.njit(inline='always') -def rejection_sample_precomputed(query_indices: IntArray, - counts: IntArray, - max_samples: int, - int_type=INT_TYPE, - bool_type=BOOL_TYPE, - valid: Optional[BoolArray]=None) -> RejectionSampleResult: +@nb.njit(inline="always") +def rejection_sample_precomputed( + query_indices: IntArray, + counts: IntArray, + max_samples: int, + int_type=INT_TYPE, + bool_type=BOOL_TYPE, + valid: Optional[BoolArray] = None, +) -> RejectionSampleResult: """ Perform rejection sampling with precomputed sample indices. @@ -870,32 +904,32 @@ def rejection_sample_precomputed(query_indices: IntArray, in_size = counts.size sample_indices = np.full((max_samples,), -1, dtype=int_type) consumed = np.zeros((in_size,), dtype=bool_type) - count = rejection_sample_precomputed_prealloc(query_indices, counts, - sample_indices, consumed, - valid) + count = rejection_sample_precomputed_prealloc( + query_indices, counts, sample_indices, consumed, valid + ) return RejectionSampleResult(sample_indices, count) @nb.njit() def rejection_sample_query_prealloc( - rejection_r: float, - query_r: float, - start_nodes: IntArray, - # --- preallocated arrays below - sample_indices: IntArray, - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - consumed: BoolArray, - # --- tree data arrays below - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + rejection_r: float, + query_r: float, + start_nodes: IntArray, + # --- preallocated arrays below + sample_indices: IntArray, + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + consumed: BoolArray, + # --- tree data arrays below + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ) -> int: """ Perform simultaneous rejection sampling and querying. @@ -946,45 +980,49 @@ def rejection_sample_query_prealloc( @nb.njit() -def get_node_indices_prealloc(idx_array: IntArray, idx_start: IntArray, - idx_end: IntArray, is_leaf: BoolArray, - node_indices: IntArray) -> None: +def get_node_indices_prealloc( + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_indices: IntArray, +) -> None: n_nodes = is_leaf.size for i in range(n_nodes): # pylint: disable=not-an-iterable if is_leaf[i]: - node_indices[idx_array[idx_start[i]:idx_end[i]]] = i + node_indices[idx_array[idx_start[i] : idx_end[i]]] = i @nb.njit() -def get_node_indices(idx_array: IntArray, idx_start: IntArray, - idx_end: IntArray, is_leaf: BoolArray) -> IntArray: +def get_node_indices( + idx_array: IntArray, idx_start: IntArray, idx_end: IntArray, is_leaf: BoolArray +) -> IntArray: """Get the index of the leaf of each data point.""" node_indices = np.empty((idx_array.size,), dtype=idx_start.dtype) - get_node_indices_prealloc(idx_array, idx_start, idx_end, is_leaf, - node_indices) + get_node_indices_prealloc(idx_array, idx_start, idx_end, is_leaf, node_indices) return node_indices @nb.njit(parallel=PARALLEL) def _rejection_sample_query_single_bottom_up( - count: int, - max_count: int, - i_node: int, - x: FloatArray, - dists: FloatArray, - indices: IntArray, - consumed: BoolArray, - rejection_r: float, - query_r: float, - # ----- tree data - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + count: int, + max_count: int, + i_node: int, + x: FloatArray, + dists: FloatArray, + indices: IntArray, + consumed: BoolArray, + rejection_r: float, + query_r: float, + # ----- tree data + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ): count = _query_radius_single_bottom_up( count, @@ -1016,22 +1054,22 @@ def _rejection_sample_query_single_bottom_up( @nb.njit(parallel=PARALLEL) def query_radius_bottom_up_prealloc( - X: FloatArray, - r: float, - start_nodes: IntArray, - # --- preallocated data below - dists: FloatArray, - indices: IntArray, - counts: IntArray, - # --- tree data below - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + X: FloatArray, + r: float, + start_nodes: IntArray, + # --- preallocated data below + dists: FloatArray, + indices: IntArray, + counts: IntArray, + # --- tree data below + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ): """ Query a binary tree when the leaf index of each query point is known. @@ -1070,22 +1108,22 @@ def query_radius_bottom_up_prealloc( @nb.njit() def _query_radius_single_bottom_up( - count: int, - max_count: int, - i_node: int, - x: FloatArray, - dists: FloatArray, - indices: IntArray, - r: float, - # -------- tree data - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + count: int, + max_count: int, + i_node: int, + x: FloatArray, + dists: FloatArray, + indices: IntArray, + r: float, + # -------- tree data + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ) -> int: count = _query_radius_single( count, @@ -1130,20 +1168,20 @@ def _query_radius_single_bottom_up( @nb.njit(parallel=PARALLEL) def query_radius_prealloc( - X: FloatArray, - r: float, - dists: FloatArray, - indices: IntArray, - counts: IntArray, - # ----- tree data below - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + X: FloatArray, + r: float, + dists: FloatArray, + indices: IntArray, + counts: IntArray, + # ----- tree data below + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ) -> None: """ Perform ball search saving data into preallocated arrays. @@ -1185,34 +1223,34 @@ def query_radius_prealloc( @nb.njit() def _query_radius_single( - count: int, - max_count: int, - i_node: int, - x: FloatArray, - dists: FloatArray, - indices: IntArray, - r: float, - data: FloatArray, - idx_array: IntArray, - idx_start: IntArray, - idx_end: IntArray, - is_leaf: BoolArray, - node_data: NodeDataArray, - rdist: RDist, - min_max_rdist: MinMaxRDist, + count: int, + max_count: int, + i_node: int, + x: FloatArray, + dists: FloatArray, + indices: IntArray, + r: float, + data: FloatArray, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + rdist: RDist, + min_max_rdist: MinMaxRDist, ) -> int: if count >= max_count: return count rdist_LB, rdist_UB = min_max_rdist(node_data[i_node], x) - #------------------------------------------------------------ + # ------------------------------------------------------------ # Case 1: all node points are outside distance r. # prune this branch. if rdist_LB > r: pass - #------------------------------------------------------------ + # ------------------------------------------------------------ # Case 2: all node points are within distance r # add all points to neighbors elif rdist_UB <= r: @@ -1224,7 +1262,7 @@ def _query_radius_single( if count >= max_count: break - #------------------------------------------------------------ + # ------------------------------------------------------------ # Case 3: this is a leaf node. Go through all points to # determine if they fall within radius elif is_leaf[i_node]: @@ -1238,7 +1276,7 @@ def _query_radius_single( if count >= max_count: break - #------------------------------------------------------------ + # ------------------------------------------------------------ # Case 4: Node is not a leaf. Recursively query subnodes else: count = _query_radius_single( @@ -1283,15 +1321,15 @@ def tree_spec(float_type=FLOAT_TYPE, int_type=INT_TYPE, bool_type=BOOL_TYPE): int_type_t = nb.from_dtype(int_type) bool_type_t = nb.from_dtype(bool_type) return [ - ('n_data', INT_TYPE_T), - ('n_features', INT_TYPE_T), - ('leaf_size', INT_TYPE_T), - ('n_nodes', INT_TYPE_T), - ('data', float_type_t[:, :]), - ('idx_array', int_type_t[::1]), - ('idx_start', int_type_t[::1]), - ('idx_end', int_type_t[::1]), - ('is_leaf', bool_type_t[::1]), + ("n_data", INT_TYPE_T), + ("n_features", INT_TYPE_T), + ("leaf_size", INT_TYPE_T), + ("n_nodes", INT_TYPE_T), + ("data", float_type_t[:, :]), + ("idx_array", int_type_t[::1]), + ("idx_start", int_type_t[::1]), + ("idx_end", int_type_t[::1]), + ("is_leaf", bool_type_t[::1]), ] @@ -1327,9 +1365,16 @@ class BinaryTree(object): See kd_tree.KDTree for implementation. """ - def __init__(self, data: FloatArray, leaf_size: int, idx_array: IntArray, - idx_start: IntArray, idx_end: IntArray, is_leaf: BoolArray, - node_data: NodeDataArray): + def __init__( + self, + data: FloatArray, + leaf_size: int, + idx_array: IntArray, + idx_start: IntArray, + idx_end: IntArray, + is_leaf: BoolArray, + node_data: NodeDataArray, + ): fill_tree_data(data, leaf_size, idx_array, idx_start, idx_end, is_leaf) self.data = data self.idx_array = idx_array @@ -1384,7 +1429,7 @@ def rdist(self) -> RDist: jitted functions does not result in a massive slow down like bound member functions does. """ - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") @property def min_max_rdist(self) -> MinMaxRDist: @@ -1395,7 +1440,7 @@ def min_max_rdist(self) -> MinMaxRDist: jitted functions does not result in a massive slow down like bound member functions does. """ - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") # def rdist(self, x, y): # raise NotImplementedError('Abstract method') @@ -1403,8 +1448,14 @@ def min_max_rdist(self) -> MinMaxRDist: # def min_max_rdist(self, lower_bounds, upper_bounds, x, n_features): # raise NotImplementedError('Abstract method') - def query_radius_prealloc(self, X: np.ndarray, r: float, dists: np.ndarray, - indices: np.ndarray, counts: np.ndarray) -> None: + def query_radius_prealloc( + self, + X: np.ndarray, + r: float, + dists: np.ndarray, + indices: np.ndarray, + counts: np.ndarray, + ) -> None: return query_radius_prealloc( X, r, @@ -1421,8 +1472,7 @@ def query_radius_prealloc(self, X: np.ndarray, r: float, dists: np.ndarray, min_max_rdist=self.min_max_rdist, ) - def query_radius(self, X: np.ndarray, r: float, - max_count: int) -> QueryResult: + def query_radius(self, X: np.ndarray, r: float, max_count: int) -> QueryResult: """ Perform ball search on query points X. @@ -1439,7 +1489,7 @@ def query_radius(self, X: np.ndarray, r: float, QueryResult: (dists, indices, counts) """ n_queries, n_features = X.shape - assert (n_features == self.n_features) + assert n_features == self.n_features shape = (n_queries, max_count) dists = np.full(shape, np.inf, dtype=self.float_type) indices = np.full(shape, self.n_data, dtype=self.int_type) @@ -1447,10 +1497,15 @@ def query_radius(self, X: np.ndarray, r: float, self.query_radius_prealloc(X, r, dists, indices, counts) return QueryResult(dists, indices, counts) - def query_radius_bottom_up_prealloc(self, X: FloatArray, r: float, - start_nodes: IntArray, - dists: FloatArray, indices: IntArray, - counts: IntArray) -> None: + def query_radius_bottom_up_prealloc( + self, + X: FloatArray, + r: float, + start_nodes: IntArray, + dists: FloatArray, + indices: IntArray, + counts: IntArray, + ) -> None: query_radius_bottom_up_prealloc( X, r, @@ -1468,24 +1523,27 @@ def query_radius_bottom_up_prealloc(self, X: FloatArray, r: float, min_max_rdist=self.min_max_rdist, ) - def query_radius_bottom_up(self, X: FloatArray, r: float, - start_nodes: IntArray, max_count: int): + def query_radius_bottom_up( + self, X: FloatArray, r: float, start_nodes: IntArray, max_count: int + ): n_queries = X.shape[0] dists = np.full((n_queries, max_count), np.inf, dtype=self.float_type) - indices = np.full((n_queries, max_count), - self.n_data, - dtype=self.int_type) + indices = np.full((n_queries, max_count), self.n_data, dtype=self.int_type) counts = np.zeros((n_queries,), dtype=self.int_type) - self.query_radius_bottom_up_prealloc(X, r, start_nodes, dists, indices, - counts) + self.query_radius_bottom_up_prealloc(X, r, start_nodes, dists, indices, counts) return QueryResult(dists, indices, counts) - def rejection_sample_query_prealloc(self, rejection_r: float, - query_r: float, start_nodes: IntArray, - sample_indices: IntArray, - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, consumed: BoolArray): + def rejection_sample_query_prealloc( + self, + rejection_r: float, + query_r: float, + start_nodes: IntArray, + sample_indices: IntArray, + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + consumed: BoolArray, + ): return rejection_sample_query_prealloc( rejection_r, query_r, @@ -1505,38 +1563,49 @@ def rejection_sample_query_prealloc(self, rejection_r: float, min_max_rdist=self.min_max_rdist, ) - def rejection_sample_query(self, rejection_r, query_r, - start_nodes: IntArray, max_samples: int, - max_counts: int) -> RejectionSampleQueryResult: - sample_indices = np.full((max_samples,), - self.n_data, - dtype=self.int_type) + def rejection_sample_query( + self, + rejection_r, + query_r, + start_nodes: IntArray, + max_samples: int, + max_counts: int, + ) -> RejectionSampleQueryResult: + sample_indices = np.full((max_samples,), self.n_data, dtype=self.int_type) shape = (max_samples, max_counts) dists = np.full(shape, np.inf, dtype=self.float_type) query_indices = np.full(shape, self.n_data, dtype=self.int_type) counts = np.full((max_samples,), -1, dtype=self.int_type) consumed = np.zeros((self.n_data,), dtype=self.bool_type) sample_count = self.rejection_sample_query_prealloc( - rejection_r, query_r, start_nodes, sample_indices, dists, - query_indices, counts, consumed) + rejection_r, + query_r, + start_nodes, + sample_indices, + dists, + query_indices, + counts, + consumed, + ) return RejectionSampleQueryResult( RejectionSampleResult(sample_indices, sample_count), - QueryResult(dists, query_indices, counts)) + QueryResult(dists, query_indices, counts), + ) def ifp_sample_query_prealloc( - self, - query_r: float, - start_nodes: IntArray, - # ----- - # pre-allocated data - sample_indices: IntArray, - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - consumed: BoolArray, - min_dists: FloatArray, # in_size, minimum distances - heap: ih.IndexHeap, # assumed to be heapified + self, + query_r: float, + start_nodes: IntArray, + # ----- + # pre-allocated data + sample_indices: IntArray, + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + consumed: BoolArray, + min_dists: FloatArray, # in_size, minimum distances + heap: ih.IndexHeap, # assumed to be heapified ) -> float: return ifp_sample_query_prealloc( query_r, @@ -1558,12 +1627,10 @@ def ifp_sample_query_prealloc( min_max_rdist=self.min_max_rdist, ) - def ifp_sample_query(self, query_r: float, start_nodes: IntArray, - sample_size: int, - max_counts: int) -> IFPSampleQueryResult: - sample_indices = np.full((sample_size,), - self.n_data, - dtype=self.int_type) + def ifp_sample_query( + self, query_r: float, start_nodes: IntArray, sample_size: int, max_counts: int + ) -> IFPSampleQueryResult: + sample_indices = np.full((sample_size,), self.n_data, dtype=self.int_type) shape = (sample_size, max_counts) dists = np.full(shape, np.inf, dtype=self.float_type) query_indices = np.full(shape, self.n_data, dtype=self.int_type) @@ -1578,30 +1645,38 @@ def ifp_sample_query(self, query_r: float, start_nodes: IntArray, sample_size * max_counts + self.n_data, ) min_dists *= -1 - min_dist = self.ifp_sample_query_prealloc(query_r, start_nodes, - sample_indices, dists, - query_indices, counts, - consumed, min_dists, heap) + min_dist = self.ifp_sample_query_prealloc( + query_r, + start_nodes, + sample_indices, + dists, + query_indices, + counts, + consumed, + min_dists, + heap, + ) return IFPSampleQueryResult( IFPSampleResult(sample_indices, min_dists, min_dist), - QueryResult(dists, query_indices, counts)) + QueryResult(dists, query_indices, counts), + ) def rejection_ifp_sample_query_prealloc( - self, - rejection_r: float, - query_r: float, - start_nodes: IntArray, - # ----- - # pre-allocated data - sample_indices: IntArray, - dists: FloatArray, - query_indices: IntArray, - counts: IntArray, - consumed: BoolArray, - min_dists: FloatArray, - heap_priorities: FloatArray, - heap_indices: IntArray, + self, + rejection_r: float, + query_r: float, + start_nodes: IntArray, + # ----- + # pre-allocated data + sample_indices: IntArray, + dists: FloatArray, + query_indices: IntArray, + counts: IntArray, + consumed: BoolArray, + min_dists: FloatArray, + heap_priorities: FloatArray, + heap_indices: IntArray, ) -> float: """ Simultaneous sampling and querying with preallocated data. @@ -1631,9 +1706,14 @@ def rejection_ifp_sample_query_prealloc( min_max_rdist=self.min_max_rdist, ) - def rejection_ifp_sample_query(self, rejection_r: float, query_r: float, - start_nodes: IntArray, sample_size: int, - max_counts: int) -> IFPSampleQueryResult: + def rejection_ifp_sample_query( + self, + rejection_r: float, + query_r: float, + start_nodes: IntArray, + sample_size: int, + max_counts: int, + ) -> IFPSampleQueryResult: """ Perform simultaneous rejection_ifp sampling and querying. @@ -1655,9 +1735,7 @@ def rejection_ifp_sample_query(self, rejection_r: float, query_r: float, - indices - counts """ - sample_indices = np.full((sample_size,), - self.n_data, - dtype=self.int_type) + sample_indices = np.full((sample_size,), self.n_data, dtype=self.int_type) shape = (sample_size, max_counts) dists = np.full(shape, np.inf, dtype=self.float_type) query_indices = np.full(shape, self.n_data, dtype=self.int_type) @@ -1670,24 +1748,37 @@ def rejection_ifp_sample_query(self, rejection_r: float, query_r: float, heap_indices = np.empty((max_heap_length,), dtype=self.int_type) min_dist = self.rejection_ifp_sample_query_prealloc( - rejection_r, query_r, start_nodes, sample_indices, dists, - query_indices, counts, consumed, min_dists, heap_priorities, - heap_indices) + rejection_r, + query_r, + start_nodes, + sample_indices, + dists, + query_indices, + counts, + consumed, + min_dists, + heap_priorities, + heap_indices, + ) return IFPSampleQueryResult( IFPSampleResult(sample_indices, min_dists, min_dist), - QueryResult(dists, query_indices, counts)) + QueryResult(dists, query_indices, counts), + ) def get_node_indices_prealloc(self, node_indices: IntArray): - get_node_indices_prealloc(self.idx_array, self.idx_start, self.idx_end, - self.is_leaf, node_indices) + get_node_indices_prealloc( + self.idx_array, self.idx_start, self.idx_end, self.is_leaf, node_indices + ) def get_node_indices(self): """Get the node indices of the leaf containing each point in data.""" - return get_node_indices(idx_array=self.idx_array, - idx_start=self.idx_start, - idx_end=self.idx_end, - is_leaf=self.is_leaf) + return get_node_indices( + idx_array=self.idx_array, + idx_start=self.idx_start, + idx_end=self.idx_end, + is_leaf=self.is_leaf, + ) def permute(self, perm): """ @@ -1697,15 +1788,15 @@ def permute(self, perm): samplings to give indices into a permuted data but don't want to rebuild the tree. """ - self.data, self.idx_array = permute_tree(self.data, self.idx_array, - perm) - - -def binary_tree(data: FloatArray, - leaf_size: int = 40, - int_type=INT_TYPE, - bool_type=BOOL_TYPE): - return BinaryTree(data, - leaf_size, - *create_tree_data(data, leaf_size, int_type, bool_type), - node_data=None) + self.data, self.idx_array = permute_tree(self.data, self.idx_array, perm) + + +def binary_tree( + data: FloatArray, leaf_size: int = 40, int_type=INT_TYPE, bool_type=BOOL_TYPE +): + return BinaryTree( + data, + leaf_size, + *create_tree_data(data, leaf_size, int_type, bool_type), + node_data=None + ) diff --git a/numba_neighbors/binary_tree_test.py b/numba_neighbors/binary_tree_test.py index b575e53..97e2a89 100644 --- a/numba_neighbors/binary_tree_test.py +++ b/numba_neighbors/binary_tree_test.py @@ -1,12 +1,12 @@ -from numba_neighbors.binary_tree import simultaneous_sort +import unittest import numpy as np -import unittest + from numba_neighbors import binary_tree as bt +from numba_neighbors.binary_tree import simultaneous_sort class BinaryTreeTest(unittest.TestCase): - def test_build(self): np.random.seed(123) N = 256 @@ -39,8 +39,7 @@ def test_permute_tree(self): perm = np.arange(N) np.random.shuffle(perm) - permuted_data, permuted_idx_array = bt.permute_tree( - data, idx_array, perm) + permuted_data, permuted_idx_array = bt.permute_tree(data, idx_array, perm) actual = permuted_data[permuted_idx_array] expected = data[idx_array] @@ -54,19 +53,21 @@ def rejection_sample_valid(self): r0 = 0.2 r1 = 0.1 - dists, indices, counts = tree.query_radius(data, r=r0**2, max_count=N) + dists, indices, counts = tree.query_radius(data, r=r0 ** 2, max_count=N) - valid = dists < r1**2 + valid = dists < r1 ** 2 actual_sample_indices, actual_count = bt.rejection_sample_precomputed( - indices, counts, N, valid=valid) + indices, counts, N, valid=valid + ) - dists, indices, counts = tree.query_radius(data, r=r1**2, max_count=N) + dists, indices, counts = tree.query_radius(data, r=r1 ** 2, max_count=N) expected_sample_indices, expected_count = bt.rejection_sample_precomputed( - indices, counts, N) + indices, counts, N + ) np.testing.assert_equal(actual_sample_indices, expected_sample_indices) np.testing.assert_equal(actual_count, expected_count) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/numba_neighbors/index_heap2.py b/numba_neighbors/index_heap2.py index e0c7eb9..24a81ed 100644 --- a/numba_neighbors/index_heap2.py +++ b/numba_neighbors/index_heap2.py @@ -4,20 +4,22 @@ Significantly slower than index_heap implementation, but I'll leave it here so when I think maybe I can make it faster by doing this I'll know I'm wrong. """ -from typing import Tuple, NamedTuple +from typing import Tuple + import numpy as np -from numba import jitclass, njit -from numba import types +from numba import jitclass, njit, types -@jitclass([('priorities', types.float32[:]), ('indices', types.int64[:]), - ('max_length', types.int64), ('length', types.int64)]) +@jitclass( + [ + ("priorities", types.float32[:]), + ("indices", types.int64[:]), + ("max_length", types.int64), + ("length", types.int64), + ] +) class IndexHeap(object): - - def __init__(self, - priorities: np.ndarray, - indices: np.ndarray, - length: int = 0): + def __init__(self, priorities: np.ndarray, indices: np.ndarray, length: int = 0): self.priorities = priorities self.indices = indices self.length = length @@ -34,14 +36,15 @@ def pop(self): return item def push(self, priority, index): - self.length = _heappush(priority, index, self.priorities, self.indices, - self.length) + self.length = _heappush( + priority, index, self.priorities, self.indices, self.length + ) @njit() def padded_index_heap(priorities, indices, max_length): length = priorities.size - assert (indices.size == length) + assert indices.size == length actual_priorities = np.empty((max_length,), dtype=priorities.dtype) actual_priorities[:length] = priorities actual_indices = np.empty((max_length,), dtype=indices.dtype) @@ -49,28 +52,28 @@ def padded_index_heap(priorities, indices, max_length): return IndexHeap(actual_priorities, actual_indices, length) -@njit(inline='always') +@njit(inline="always") def _getitem(index, priorities, indices): # assert (0 <= index) # assert (index < self.length) return priorities[index], indices[index] -@njit(inline='always') +@njit(inline="always") def _setitem(index, item, priorities, indices): pr, val = item priorities[index] = pr indices[index] = val -@njit(inline='always') +@njit(inline="always") def _replaceitem(dst_index, src_index, priorities, indices): """Equivalent to self._setitem(dst_index, self._getitem(src_index)).""" priorities[dst_index] = priorities[src_index] indices[dst_index] = indices[src_index] -@njit(inline='always') +@njit(inline="always") def _arr_append(priority, value, priorities, indices, length) -> int: # assert (self.length < self.max_length) priorities[length] = priority @@ -78,7 +81,7 @@ def _arr_append(priority, value, priorities, indices, length) -> int: return length + 1 -@njit(inline='always') +@njit(inline="always") def _arr_pop(priorities, indices, length) -> Tuple[Tuple, int]: # assert (self.length > 0) length = length - 1 @@ -87,7 +90,7 @@ def _arr_pop(priorities, indices, length) -> Tuple[Tuple, int]: return (priority, value), length -@njit(inline='always') +@njit(inline="always") def _siftup(pos, priorities, indices, length): endpos = length startpos = pos @@ -97,8 +100,7 @@ def _siftup(pos, priorities, indices, length): while childpos < endpos: # Set childpos to index of smaller child. rightpos = childpos + 1 - if (rightpos < endpos and - not priorities[childpos] < priorities[rightpos]): + if rightpos < endpos and not priorities[childpos] < priorities[rightpos]: childpos = rightpos # Move the smaller child up. _replaceitem(pos, childpos, priorities, indices) @@ -110,7 +112,7 @@ def _siftup(pos, priorities, indices, length): _siftdown(startpos, pos, priorities, indices) -@njit(inline='always') +@njit(inline="always") def _siftdown(startpos, pos, priorities, indices): newpr, newind = _getitem(pos, priorities, indices) # Follow the path to the root, moving parents down until finding a place @@ -128,7 +130,7 @@ def _siftdown(startpos, pos, priorities, indices): indices[pos] = newind -@njit(inline='always') +@njit(inline="always") def _heapify(priorities, indices, length): """Transform list into a heap, in-place, in O(len(x)) time.""" # Transform bottom-up. The largest index there's any point to looking at @@ -140,7 +142,7 @@ def _heapify(priorities, indices, length): _siftup(i, priorities, indices, length) -@njit(inline='always') +@njit(inline="always") def _heappush(priority, value, priorities, indices, length): """Push item onto heap, maintaining the heap invariant.""" length = _arr_append(priority, value, priorities, indices, length) @@ -148,10 +150,12 @@ def _heappush(priority, value, priorities, indices, length): return length -@njit(inline='always') +@njit(inline="always") def _heappop(priorities, indices, length) -> Tuple: """Pop the smallest item off the heap, maintaining the heap invariant.""" - lastelt, length = _arr_pop(priorities, indices, length) # pylint: disable=unbalanced-tuple-unpacking + lastelt, length = _arr_pop( + priorities, indices, length + ) # pylint: disable=unbalanced-tuple-unpacking if length > 0: returnitem = _getitem(0, priorities, indices) _setitem(0, lastelt, priorities, indices) diff --git a/numba_neighbors/index_heap_test.py b/numba_neighbors/index_heap_test.py index fb8b6e9..442fa8a 100644 --- a/numba_neighbors/index_heap_test.py +++ b/numba_neighbors/index_heap_test.py @@ -1,26 +1,23 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - +import heapq import os -os.environ['NUMBA_DISABLE_JIT'] = '1' - import unittest -import heapq + +import numpy as np + from numba_neighbors import index_heap as ih from numba_neighbors import index_heap2 as ih2 -import numpy as np + +os.environ["NUMBA_DISABLE_JIT"] = "1" + max_length = 100 length = 50 class IndexHeapTest(unittest.TestCase): - def setUp(self): np.random.seed(123) - self.priorities = np.random.uniform(size=(max_length,)).astype( - np.float32) + self.priorities = np.random.uniform(size=(max_length,)).astype(np.float32) self.indices = np.arange(max_length) self.heap = list(zip(self.priorities[:length], self.indices[:length])) heapq.heapify(self.heap) @@ -45,18 +42,15 @@ def test_heappop(self): self.assertEqual(actualval, expectedval) def test_heappush(self): - heapq.heappush(self.heap, - (self.priorities[length], self.indices[length])) + heapq.heappush(self.heap, (self.priorities[length], self.indices[length])) self.iheap.push(self.priorities[length], self.indices[length]) self._assert_heaps_equal() class IndexHeap2Test(IndexHeapTest): - def setUp(self): np.random.seed(123) - self.priorities = np.random.uniform(size=(max_length,)).astype( - np.float32) + self.priorities = np.random.uniform(size=(max_length,)).astype(np.float32) self.indices = np.arange(max_length) self.heap = list(zip(self.priorities[:length], self.indices[:length])) heapq.heapify(self.heap) @@ -65,5 +59,5 @@ def setUp(self): self.iheap.heapify() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/numba_neighbors/kd_tree.py b/numba_neighbors/kd_tree.py index 5a1fb99..31b1030 100644 --- a/numba_neighbors/kd_tree.py +++ b/numba_neighbors/kd_tree.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numba as nb import numpy as np from numba.experimental import jitclass diff --git a/numba_neighbors/kd_tree_test.py b/numba_neighbors/kd_tree_test.py index c94f061..26e8fcb 100644 --- a/numba_neighbors/kd_tree_test.py +++ b/numba_neighbors/kd_tree_test.py @@ -1,19 +1,16 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# import os -# os.environ['NUMBA_DISABLE_JIT'] = '1' +import unittest import numpy as np -import unittest -from numba_neighbors import kd_tree as kd -from numba_neighbors import binary_tree as bt from sklearn.neighbors import KDTree as sk_KDTree +from numba_neighbors import binary_tree as bt +from numba_neighbors import kd_tree as kd -class KDTreeTest(unittest.TestCase): +# import os +# os.environ['NUMBA_DISABLE_JIT'] = '1' + +class KDTreeTest(unittest.TestCase): def tree(self, data, leaf_size): return kd.KDTree(data, leaf_size=leaf_size) @@ -60,7 +57,8 @@ def test_query_consistent(self): sk_tree = sk_KDTree(data, leaf_size=leaf_size) expected_indices, expected_dists = sk_tree.query_radius( - X, r, return_distance=True, sort_results=True) + X, r, return_distance=True, sort_results=True + ) expected_counts = [d.size for d in expected_dists] expected_dists = np.concatenate(expected_dists, axis=0) expected_indices = np.concatenate(expected_indices, axis=0) @@ -74,8 +72,9 @@ def test_query_consistent(self): numba_tree.query_radius_prealloc(X, r2, dists, indices, counts) bt.simultaneous_sort_partial(dists, indices, counts) - mask = np.tile(np.expand_dims(np.arange(max_neighbors), 0), - (n, 1)) < np.expand_dims(counts, axis=1) + mask = np.tile( + np.expand_dims(np.arange(max_neighbors), 0), (n, 1) + ) < np.expand_dims(counts, axis=1) flat_dists = dists[mask] flat_indices = indices[mask] @@ -99,7 +98,8 @@ def test_query_bottom_up_consistent(self): sk_tree = sk_KDTree(data, leaf_size=leaf_size) expected_indices, expected_dists = sk_tree.query_radius( - X, r, return_distance=True, sort_results=True) + X, r, return_distance=True, sort_results=True + ) expected_counts = [d.size for d in expected_dists] expected_dists = np.concatenate(expected_dists, axis=0) expected_indices = np.concatenate(expected_indices, axis=0) @@ -113,12 +113,14 @@ def test_query_bottom_up_consistent(self): start_nodes = numba_tree.get_node_indices()[X_indices] - numba_tree.query_radius_bottom_up_prealloc(X, r2, start_nodes, dists, - indices, counts) + numba_tree.query_radius_bottom_up_prealloc( + X, r2, start_nodes, dists, indices, counts + ) bt.simultaneous_sort_partial(dists, indices, counts) - mask = np.tile(np.expand_dims(np.arange(max_neighbors), 0), - (n, 1)) < np.expand_dims(counts, axis=1) + mask = np.tile( + np.expand_dims(np.arange(max_neighbors), 0), (n, 1) + ) < np.expand_dims(counts, axis=1) flat_dists = dists[mask] flat_indices = indices[mask] @@ -142,11 +144,13 @@ def test_ifp_sample_consistent(self): numba_tree = self.tree(data, leaf_size=leaf_size) start_indices = numba_tree.get_node_indices() - sr, qr = numba_tree.ifp_sample_query(r2, start_indices, sample_size, - max_neighbors) + sr, qr = numba_tree.ifp_sample_query( + r2, start_indices, sample_size, max_neighbors + ) sr2, qr2 = numba_tree.rejection_ifp_sample_query( - r2, r2, start_indices, sample_size, max_neighbors) + r2, r2, start_indices, sample_size, max_neighbors + ) np.testing.assert_equal(sr.indices, sr2.indices) np.testing.assert_allclose(sr.min_dists, sr2.min_dists) @@ -157,11 +161,11 @@ def test_ifp_sample_consistent(self): np.testing.assert_equal(qr.counts, qr2.counts) dists, indices, counts = numba_tree.query_radius_bottom_up( - data, r2, start_indices, max_neighbors) + data, r2, start_indices, max_neighbors + ) sr3 = bt.ifp_sample_precomputed(dists, indices, counts, sample_size) - sr4 = bt.rejection_ifp_sample_precomputed(dists, indices, counts, - sample_size) + sr4 = bt.rejection_ifp_sample_precomputed(dists, indices, counts, sample_size) # # compare sr3 / sr4 np.testing.assert_allclose(sr3.min_dists, sr4.min_dists) @@ -185,7 +189,6 @@ def test_ifp_sample_consistent(self): class KDTree3Test(KDTreeTest): - @property def num_dims(self): return 3 @@ -194,5 +197,5 @@ def tree(self, data, leaf_size): return kd.KDTree3(data, leaf_size=leaf_size) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/setup.py b/setup.py index c34ee24..36e8d1d 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,16 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from setuptools import find_packages, setup -from setuptools import setup -from setuptools import find_packages - -with open('requirements.txt') as fp: - install_requires = fp.read().split('\n') +with open("requirements.txt") as fp: + install_requires = fp.read().split("\n") setup( - name='numba-neighbors', - version='0.0.1', - description='Numba implementation of scikit-learn neighbors', - url='https://github.com/jackd/numba-neighbors.git', - author='Dominic Jack', - author_email='thedomjack@gmail.com', - license='MIT', + name="numba-neighbors", + version="0.0.1", + description="Numba implementation of scikit-learn neighbors", + url="https://github.com/jackd/numba-neighbors.git", + author="Dominic Jack", + author_email="thedomjack@gmail.com", + license="MIT", packages=find_packages(), requirements=install_requires, zip_safe=True,