Skip to content

Commit

Permalink
Adding more test cases to HDBSCAN
Browse files Browse the repository at this point in the history
Signed-off-by: Julio Faracco <[email protected]>
  • Loading branch information
jcfaracco committed Jul 17, 2024
1 parent 8f53d9d commit beb9b70
Showing 1 changed file with 85 additions and 10 deletions.
95 changes: 85 additions & 10 deletions tests/ml/cluster/test_hdbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,80 @@
import unittest
import numpy as np

from parameterized import parameterized_class

try:
import cupy as cp
except ImportError:
pass

from sklearn.datasets import make_blobs

try:
from dasf.ml.cluster import HDBSCAN
except ImportError:
raise unittest.SkipTest("HDBSCAN is probably affected by CVE-2022-21797")
from sklearn.datasets import make_blobs, make_moons

from dasf.ml.cluster import HDBSCAN
from dasf.utils.types import is_cpu_array
from dasf.utils.types import is_gpu_array
from dasf.utils.funcs import is_gpu_supported


def generate_blobs():
blobs = []

X, y, c = make_blobs(n_samples=1000,
centers=3,
n_features=2,
return_centers=True,
random_state=42)

blobs.append({'X': X, 'y': y, 'centroids': c})

X, y, c = make_blobs(n_samples=1000,
return_centers=True,
random_state=30)

blobs.append({'X': X, 'y': y, 'centroids': c})

# X, y, c = make_blobs(n_samples=4000,
# centers=[(-0.75,2.25),
# (1.0, 2.0),
# (1.0, 1.0),
# (2.0, -0.5),
# (-1.0, -1.0),
# (0.0, 0.0)],
# cluster_std=0.5,
# return_centers=True,
# random_state=12)
#
# blobs.append({'id': 3, 'X': X, 'y': y, 'centroids': c})

X, y, c = make_blobs(n_samples=2000,
n_features=10,
return_centers=True,
random_state=10)

blobs.append({'X': X, 'y': y, 'centroids': c})

X, y = make_moons(n_samples=3000,
noise=0.1,
random_state=42)

blobs.append({'X': X, 'y': y, 'centroids': []})

return blobs


class TestHDBSCAN(unittest.TestCase):
def setUp(self):
self.size = 1000
self.centers = 3
size = 1000
centers = 3
random_state = 42

self.X, self.y, self.centroids = make_blobs(n_samples=self.size,
centers=self.centers,
self.X, self.y, self.centroids = make_blobs(n_samples=size,
centers=centers,
n_features=2,
return_centers=True,
random_state=random_state)


def __match_randomly_labels_created(self, y1, y2):
y2 = (y2 * -1) - 1

Expand Down Expand Up @@ -83,3 +128,33 @@ def test_hdbscan_gpu(self):
y1, y2 = self.__match_randomly_labels_created(y.get(), self.y)

self.assertTrue(float(len(np.where(y1 != y2)[0])/len(y1))*100 < 5.0)


@parameterized_class(generate_blobs())
class TestHDBSCANMatches(unittest.TestCase):
def __match_randomly_labels_created(self, y1, y2):
y2 = (y2 * -1) - 1

for i in range(len(y1)):
if y2[i] < 0:
y2[y2 == y2[i]] = y1[i]

if not np.any(y2[y2 < 0]):
break

return y1, y2

@unittest.skipIf(not is_gpu_supported(),
"not supported CUDA in this platform")
def test_hdbscan(self):
sc_cpu = HDBSCAN()
sc_gpu = HDBSCAN()

cp_X = cp.asarray(self.X)

y_cpu = sc_cpu._fit_predict_cpu(self.X)
y_gpu = sc_gpu._fit_predict_gpu(cp_X)

y1, y2 = self.__match_randomly_labels_created(y_gpu.get(), y_cpu)

self.assertTrue(float(len(np.where(y1 != y2)[0])/len(y1))*100 < 5.0)

0 comments on commit beb9b70

Please sign in to comment.