We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
========================================================================================= FAILURES ========================================================================================= _________________________________________________________________________ test_bitpacked_nn_descent_query_accuracy _________________________________________________________________________ nn_data = array([[0.52111531, 0.77647716, 0.93834037, 0.66185582, 0.19981062], [0.43632302, 0.16532886, 0.67949223, 0.587... 0. , 0. , 0. , 0. ], [0. , 0. , 0. , 0. , 0. ]]) def test_bitpacked_nn_descent_query_accuracy(nn_data): bitpacked_data = (nn_data * 256).astype(np.uint8) unpacked_data = np.zeros( (bitpacked_data.shape[0], bitpacked_data.shape[1] * 8), dtype=np.float32 ) for i in range(unpacked_data.shape[0]): for j in range(unpacked_data.shape[1]): unpacked_data[i, j] = (bitpacked_data[i, j // 8] & (1 << (j % 8))) > 0 > nnd = NNDescent( bitpacked_data[200:], "bit_jaccard", n_neighbors=50, random_state=None ) pynndescent/tests/test_pynndescent_.py:207: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <pynndescent.pynndescent_.NNDescent object at 0x24f3964bdc10> data = array([[ 91, 127, 169, 1, 27], [ 51, 148, 22, 235, 2], [170, 219, 25, 239, 176], ..., [ 44, 244, 81, 85, 237], [ 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0]], dtype=uint8) metric = 'bit_jaccard', metric_kwds = {}, n_neighbors = 50, n_trees = 10, leaf_size = None, pruning_degree_multiplier = 1.5, diversify_prob = 1.0, n_search_trees = 1, tree_init = True init_graph = None, init_dist = None, random_state = None, low_memory = True, max_candidates = None, max_rptree_depth = 200, n_iters = 10, delta = 0.001, n_jobs = None, compressed = False parallel_batch_queries = False, verbose = False def __init__( self, data, metric="euclidean", metric_kwds=None, n_neighbors=30, n_trees=None, leaf_size=None, pruning_degree_multiplier=1.5, diversify_prob=1.0, n_search_trees=1, tree_init=True, init_graph=None, init_dist=None, random_state=None, low_memory=True, max_candidates=None, max_rptree_depth=200, n_iters=None, delta=0.001, n_jobs=None, compressed=False, parallel_batch_queries=False, verbose=False, ): if n_trees is None: n_trees = 5 + int(round((data.shape[0]) ** 0.25)) n_trees = min(32, n_trees) # Only so many trees are useful if n_iters is None: n_iters = max(5, int(round(np.log2(data.shape[0])))) self.n_trees = n_trees self.n_trees_after_update = max(1, int(np.round(self.n_trees / 3))) self.n_neighbors = n_neighbors self.metric = metric self.metric_kwds = metric_kwds self.leaf_size = leaf_size self.prune_degree_multiplier = pruning_degree_multiplier self.diversify_prob = diversify_prob self.n_search_trees = n_search_trees self.max_rptree_depth = max_rptree_depth self.max_candidates = max_candidates self.low_memory = low_memory self.n_iters = n_iters self.delta = delta self.dim = data.shape[1] self.n_jobs = n_jobs self.compressed = compressed self.parallel_batch_queries = parallel_batch_queries self.verbose = verbose if getattr(data, "dtype", None) == np.float32 and ( issparse(data) or is_c_contiguous(data) ): copy_on_normalize = True else: copy_on_normalize = False if metric in ("bit_hamming", "bit_jaccard"): data = check_array(data, dtype=np.uint8, order="C") self._input_dtype = np.uint8 else: data = check_array(data, dtype=np.float32, accept_sparse="csr", order="C") self._input_dtype = np.float32 self._raw_data = data if not tree_init or n_trees == 0 or init_graph is not None: self.tree_init = False else: self.tree_init = True metric_kwds = metric_kwds or {} self._dist_args = tuple(metric_kwds.values()) self.random_state = random_state current_random_state = check_random_state(self.random_state) self._distance_correction = None if callable(metric): _distance_func = metric elif metric in pynnd_dist.named_distances: if metric in pynnd_dist.fast_distance_alternatives: _distance_func = pynnd_dist.fast_distance_alternatives[metric]["dist"] self._distance_correction = pynnd_dist.fast_distance_alternatives[ metric ]["correction"] else: _distance_func = pynnd_dist.named_distances[metric] else: raise ValueError("Metric is neither callable, " + "nor a recognised string") # Create a partial function for distances with arguments if len(self._dist_args) > 0: dist_args = self._dist_args @numba.njit() def _partial_dist_func(x, y): return _distance_func(x, y, *dist_args) self._distance_func = _partial_dist_func else: self._distance_func = _distance_func if metric in ( "cosine", "dot", "correlation", "dice", "jaccard", "hellinger", "hamming", "bit_hamming", "bit_jaccard", ): self._angular_trees = True if metric in ("bit_hamming", "bit_jaccard"): self._bit_trees = True else: self._bit_trees = False else: self._angular_trees = False self._bit_trees = False if metric == "dot": data = normalize(data, norm="l2", copy=copy_on_normalize) self._raw_data = data self.rng_state = current_random_state.randint(INT32_MIN, INT32_MAX, 3).astype( np.int64 ) self.search_rng_state = current_random_state.randint( INT32_MIN, INT32_MAX, 3 ).astype(np.int64) # Warm up the rng state for i in range(10): _ = tau_rand_int(self.search_rng_state) if self.tree_init: if verbose: print(ts(), "Building RP forest with", str(n_trees), "trees") self._rp_forest = make_forest( data, n_neighbors, n_trees, leaf_size, self.rng_state, current_random_state, self.n_jobs, self._angular_trees, self._bit_trees, max_depth=self.max_rptree_depth, ) leaf_array = rptree_leaf_array(self._rp_forest) else: self._rp_forest = None leaf_array = np.array([[-1]]) if self.max_candidates is None: effective_max_candidates = min(60, self.n_neighbors) else: effective_max_candidates = self.max_candidates # Set threading constraints self._original_num_threads = numba.get_num_threads() if self.n_jobs != -1 and self.n_jobs is not None: numba.set_num_threads(self.n_jobs) if isspmatrix_csr(self._raw_data): self._is_sparse = True if not self._raw_data.has_sorted_indices: self._raw_data.sort_indices() if metric in sparse.sparse_named_distances: if metric in sparse.sparse_fast_distance_alternatives: _distance_func = sparse.sparse_fast_distance_alternatives[metric][ "dist" ] self._distance_correction = ( sparse.sparse_fast_distance_alternatives[metric]["correction"] ) else: _distance_func = sparse.sparse_named_distances[metric] elif callable(metric): _distance_func = metric else: raise ValueError( "Metric {} not supported for sparse data".format(metric) ) if metric in sparse.sparse_need_n_features: metric_kwds["n_features"] = self._raw_data.shape[1] self._dist_args = tuple(metric_kwds.values()) # Create a partial function for distances with arguments if len(self._dist_args) > 0: dist_args = self._dist_args @numba.njit() def _partial_dist_func(ind1, data1, ind2, data2): return _distance_func(ind1, data1, ind2, data2, *dist_args) self._distance_func = _partial_dist_func else: self._distance_func = _distance_func if init_graph is None: _init_graph = EMPTY_GRAPH else: if init_graph.shape[0] != self._raw_data.shape[0]: raise ValueError("Init graph size does not match dataset size!") _init_graph = make_heap(init_graph.shape[0], self.n_neighbors) _init_graph = sparse_initalize_heap_from_graph_indices( _init_graph, init_graph, self._raw_data.indptr, self._raw_data.indices, self._raw_data.data, self._distance_func, ) if verbose: print(ts(), "metric NN descent for", str(n_iters), "iterations") self._neighbor_graph = sparse_nnd.nn_descent( self._raw_data.indices, self._raw_data.indptr, self._raw_data.data, self.n_neighbors, self.rng_state, max_candidates=effective_max_candidates, dist=self._distance_func, n_iters=self.n_iters, delta=self.delta, rp_tree_init=True, leaf_array=leaf_array, init_graph=_init_graph, low_memory=self.low_memory, verbose=verbose, ) else: self._is_sparse = False if init_graph is None: _init_graph = EMPTY_GRAPH else: if init_graph.shape[0] != self._raw_data.shape[0]: raise ValueError("Init graph size does not match dataset size!") _init_graph = make_heap(init_graph.shape[0], self.n_neighbors) if init_dist is None: _init_graph = initalize_heap_from_graph_indices( _init_graph, init_graph, data, self._distance_func ) elif init_graph.shape != init_dist.shape: raise ValueError( "The shapes of init graph and init distances do not match!" ) else: _init_graph = initalize_heap_from_graph_indices_and_distances( _init_graph, init_graph, init_dist ) if verbose: print(ts(), "NN descent for", str(n_iters), "iterations") > self._neighbor_graph = nn_descent( self._raw_data, self.n_neighbors, self.rng_state, effective_max_candidates, self._distance_func, self.n_iters, self.delta, low_memory=self.low_memory, rp_tree_init=True, init_graph=_init_graph, leaf_array=leaf_array, verbose=verbose, ) E ZeroDivisionError: division by zero pynndescent/pynndescent_.py:946: ZeroDivisionError ===================================================================================== warnings summary ===================================================================================== pynndescent/tests/test_distances.py::test_bit_jaccard pynndescent/tests/test_pynndescent_.py::test_bitpacked_nn_descent_neighbor_accuracy /usr/local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py:2317: DataConversionWarning: Data was converted to boolean for metric jaccard warnings.warn(msg, DataConversionWarning) pynndescent/tests/test_pynndescent_.py::test_bitpacked_nn_descent_neighbor_accuracy /usr/ports/math/py-pynndescent/work-py39/pynndescent-0.5.12/pynndescent/pynndescent_.py:962: UserWarning: Failed to correctly find n_neighbors for some samples. Results may be less than ideal. Try re-running with different parameters. warn( -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ================================================================================= short test summary info ================================================================================== SKIPPED [1] pynndescent/tests/test_distances.py:245: incorrect function in scipy<1.8 ============================================================= 1 failed, 145 passed, 1 skipped, 3 warnings in 575.60s (0:09:35) ============================================================= *** Error code 1
Version: 0.5.12 Python-3.9 FreeBSD 14.0
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Version: 0.5.12
Python-3.9
FreeBSD 14.0
The text was updated successfully, but these errors were encountered: