Skip to content

Commit

Permalink
mimic sklearn weighted random init
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharras committed Aug 1, 2023
1 parent ef49037 commit bc2e287
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions sklearn_numba_dpex/kmeans/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,16 +188,13 @@ def init_centroids(self, X, sample_weight):
else:
# NB: sampling without replacement must be executed sequentially so
# it's better done on CPU
sample_weight_numpy = dpt.asnumpy(sample_weight)
p = sample_weight_numpy / sample_weight_numpy.sum()
centers_idx = self.random_state.choice(
X.shape[0], size=n_clusters, replace=False
X.shape[0], size=n_clusters, replace=False, p=p
)
# Poor man's fancy indexing
# TODO: write a kernel ? or replace with better equivalent when available ?
# Relevant issue: https://github.com/IntelPython/dpctl/issues/1003
centers_t = dpt.concat(
[dpt.expand_dims(X[center_idx], axis=1) for center_idx in centers_idx],
axis=1,
)
centers_t = dpt.take(X.T, dpt.asarray(centers_idx), axis=1)

return centers_t

Expand Down

0 comments on commit bc2e287

Please sign in to comment.