Skip to content

Commit 94ccb0a

Browse files
OkuyanBogaFrancescaSchiavoscar-wallisedoaltamurasmens
authored
Add predict_proba Support to PegasosQSVC and NeuralNetworkClassifier (#871)
* Adding a predict_proba function to classifiers. (#57) * Update README.md * Predict proba for NNC and PegQSVC * Rewriting predict proba features and docstring It was very inefficient before and didn't have the validation checks needed. The code is now more clear and docstring has been added. * Tweak documentation for NNC and PegasosQSVC, silence lint E1101 on torch connector * Update test with `QNN.predict_proba` * Update test with `PegasosESVC.predict_proba` * Added a release note and solved conflicts with main --------- Co-authored-by: FrancescaSchiav <[email protected]> Co-authored-by: oscar-wallis <[email protected]> Co-authored-by: Edoardo Altamura <[email protected]> Co-authored-by: smens <[email protected]> * Reformatted docs * Fix usage of sklearn --------- Co-authored-by: FrancescaSchiav <[email protected]> Co-authored-by: oscar-wallis <[email protected]> Co-authored-by: Edoardo Altamura <[email protected]> Co-authored-by: smens <[email protected]>
1 parent cd7a332 commit 94ccb0a

File tree

5 files changed

+204
-34
lines changed

5 files changed

+204
-34
lines changed

qiskit_machine_learning/algorithms/classifiers/neural_network_classifier.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,23 +140,76 @@ def _create_objective(self, X: np.ndarray, y: np.ndarray) -> ObjectiveFunction:
140140
return function
141141

142142
def predict(self, X: np.ndarray) -> np.ndarray:
143-
self._check_fitted()
143+
"""
144+
Perform classification on samples in X.
145+
146+
Args:
147+
X (np.ndarray): Input features. For a callable kernel (an instance of
148+
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
149+
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
150+
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
151+
predicted, and ``n_*`` denotes the size of the training set.
152+
In the case of a pre-computed kernel, the kernel values in ``X`` must be calculated
153+
with respect to the elements of the set to be predicted and the training set.
154+
155+
Returns:
156+
np.ndarray: An array of shape ``(n_samples,)``, representing the predicted class labels for
157+
each sample in ``X``.
144158
159+
Raises:
160+
QiskitMachineLearningError:
161+
- If the :meth:`predict` method is called before the model has been fit.
162+
ValueError:
163+
- If the pre-computed kernel matrix has the wrong shape and/or dimension.
164+
"""
165+
self._check_fitted()
145166
X, _ = self._validate_input(X)
146167

147168
if self._neural_network.output_shape == (1,):
148-
predict = np.sign(self._neural_network.forward(X, self._fit_result.x))
169+
# Binary classification
170+
raw_output = self._neural_network.forward(X, self._fit_result.x)
171+
predict = np.sign(raw_output)
149172
else:
173+
# Multi-class classification
150174
forward = self._neural_network.forward(X, self._fit_result.x)
151175
predict_ = np.argmax(forward, axis=1)
176+
152177
if self._one_hot:
178+
# Convert class indices to one-hot encoded format
153179
predict = np.zeros(forward.shape)
154180
for i, v in enumerate(predict_):
155181
predict[i, v] = 1
156182
else:
157183
predict = predict_
184+
158185
return self._validate_output(predict)
159186

187+
def predict_proba(self, X: np.ndarray) -> np.ndarray:
188+
"""
189+
Extracts the predicted probabilities for each class based on the output of a neural
190+
network.
191+
192+
Args:
193+
X (np.ndarray): Input features. For a callable kernel (an instance of
194+
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
195+
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
196+
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
197+
predicted, and ``n_*`` denotes the size of the training set. In the case of a
198+
pre-computed kernel, the kernel values in ``X`` must be calculated with respect to
199+
the elements of the set to be predicted and the training set.
200+
201+
Returns:
202+
np.ndarray: An array of shape ``(n_samples, n_classes)`` representing the predicted class
203+
probabilities (in the range :math:`[0, 1]`) for each sample in ``X``.
204+
"""
205+
self._check_fitted()
206+
X, _ = self._validate_input(X)
207+
208+
# Assumes an activation function is applied within the forward method
209+
proba = self._neural_network.forward(X, self._fit_result.x)
210+
211+
return proba
212+
160213
def score(self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray | None = None) -> float:
161214
return ClassifierMixin.score(self, X, y, sample_weight)
162215

qiskit_machine_learning/algorithms/classifiers/pegasos_qsvc.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def fit(
203203

204204
self.fit_status_ = PegasosQSVC.FITTED
205205

206-
logger.debug("fit completed after %s", str(datetime.now() - t_0)[:-7])
206+
logger.debug("Fit completed after %s", str(datetime.now() - t_0)[:-7])
207207

208208
return self
209209

@@ -213,33 +213,62 @@ def predict(self, X: np.ndarray) -> np.ndarray:
213213
Perform classification on samples in X.
214214
215215
Args:
216-
X: Features. For a callable kernel (an instance of
217-
:class:`~qiskit_machine_learning.kernels.BaseKernel`) the shape
218-
should be ``(m_samples, n_features)``, for a precomputed kernel the shape should be
219-
``(m_samples, n_samples)``. Where ``m`` denotes the set to be predicted and ``n`` the
220-
size of the training set. In that case, the kernel values in X have to be calculated
221-
with respect to the elements of the set to be predicted and the training set.
216+
X (np.ndarray): Input features. For a callable kernel (an instance of
217+
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
218+
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
219+
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
220+
predicted, and ``n_*`` denotes the size of the training set. In the case of a
221+
pre-computed kernel, the kernel values in ``X`` must be calculated with respect to
222+
the elements of the set to be predicted and the training set.
222223
223224
Returns:
224-
An array of the shape (n_samples), the predicted class labels for samples in X.
225+
np.ndarray: An array of shape ``(n_samples,)``, representing the predicted class labels for
226+
each sample in ``X``.
225227
226228
Raises:
227229
QiskitMachineLearningError:
228-
- predict is called before the model has been fit.
230+
- If the :meth:`predict` method is called before the model has been fit.
229231
ValueError:
230-
- Pre-computed kernel matrix has the wrong shape and/or dimension.
232+
- If the pre-computed kernel matrix has the wrong shape and/or dimension.
231233
"""
232234

233235
t_0 = datetime.now()
234236
values = self.decision_function(X)
235237
y = np.array([self._label_pos if val > 0 else self._label_neg for val in values])
236-
logger.debug("prediction completed after %s", str(datetime.now() - t_0)[:-7])
238+
logger.debug("Prediction completed after %s", str(datetime.now() - t_0)[:-7])
237239

238240
return y
239241

242+
def predict_proba(self, X: np.ndarray) -> np.ndarray:
243+
"""
244+
Extract class prediction probabilities. The decision function values are
245+
not bounded in the range :math:`[0, 1]`. Therefore, these values are
246+
converted into probabilities using the sigmoid activation
247+
function, which maps the real-valued outputs to the :math:`[0, 1]` range.
248+
249+
Args:
250+
X (np.ndarray): Input features. For a callable kernel (an instance of
251+
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
252+
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
253+
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
254+
predicted, and ``n_*`` denotes the size of the training set. In the case of a
255+
pre-computed kernel, the kernel values in ``X`` must be calculated with respect to
256+
the elements of the set to be predicted and the training set.
257+
258+
Returns:
259+
np.ndarray: An array of shape ``(n_samples, 2)``, representing the predicted class
260+
probabilities (in the range :math:`[0, 1]`) for each sample in ``X``.
261+
"""
262+
values = self.decision_function(X)
263+
264+
probabilities = 1 / (1 + np.exp(-values)) # Sigmoid activation function
265+
probabilities = np.dstack((1 - probabilities, probabilities))[0]
266+
267+
return probabilities
268+
240269
def decision_function(self, X: np.ndarray) -> np.ndarray:
241270
"""
242-
Evaluate the decision function for the samples in X.
271+
Evaluate the decision function for the samples in ``X``.
243272
244273
Args:
245274
X: Features. For a callable kernel (an instance of
@@ -259,7 +288,7 @@ def decision_function(self, X: np.ndarray) -> np.ndarray:
259288
- Pre-computed kernel matrix has the wrong shape and/or dimension.
260289
"""
261290
if self.fit_status_ == PegasosQSVC.UNFITTED:
262-
raise QiskitMachineLearningError("The PegasosQSVC has to be fit first")
291+
raise QiskitMachineLearningError("The PegasosQSVC has to be fit first.")
263292
if np.ndim(X) != 2:
264293
raise ValueError("X has to be a 2D array")
265294
if self._precomputed and self._n_samples != X.shape[1]:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
features:
2+
- |
3+
The :class:`~qiskit_machine_learning.algorithms.PegasosQSVC` and algorithms derived
4+
from :class:`~qiskit_machine_learning.algorithms.NeuralNetworkClassifier` module now support `predict_proba` function.
5+
This method can be utilized similarly to other `scikit-learn`-based algorithms.

test/algorithms/classifiers/test_neural_network_classifier.py

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,58 @@ def parity(x):
160160
return qnn, num_inputs, ansatz.num_parameters
161161

162162
def _generate_data(self, num_inputs: int) -> tuple[np.ndarray, np.ndarray]:
163-
# construct data
163+
"""
164+
Generates synthetic data consisting of randomly generated features and binary labels.
165+
Each label is determined based on the sum of the corresponding feature values. If the sum of
166+
the feature values for a sample is less than or equal to 1, the label is 1. Otherwise, the
167+
label is 0.
168+
169+
Args:
170+
num_inputs (int): The number of features for each sample.
171+
172+
Returns:
173+
tuple[np.ndarray, np.ndarray]: A tuple containing two numpy arrays:
174+
- features: An array of shape ``(6, num_inputs)`` with randomly generated feature values.
175+
- labels: An array of shape ``(6,)`` with binary labels for each sample.
176+
"""
177+
# Fixed number of samples for consistency
178+
num_samples = 6
179+
180+
features = algorithm_globals.random.random((num_samples, num_inputs))
181+
182+
# Assign binary labels based on feature sums
183+
labels = (np.sum(features, axis=1) <= 1).astype(float)
184+
185+
return features, labels
186+
187+
def _generate_data_multiclass(self, num_inputs: int) -> tuple[np.ndarray, np.ndarray]:
188+
"""
189+
Generates synthetic data consisting of randomly generated features and 3 categorical labels.
190+
Each label is determined based on the sum of the corresponding feature values, assigned
191+
as follows:
192+
- Label 0.0 if the sum of features <= 0.5.
193+
- Label 1.0 if 0.5 < sum of features <= 1.0.
194+
- Label 2.0 if sum of features > 1.0.
195+
196+
Args:
197+
num_inputs (int): The number of features for each sample.
198+
199+
Returns:
200+
tuple[np.ndarray, np.ndarray]: A tuple containing two numpy arrays:
201+
- features: An array of shape ``(6, num_inputs)`` with randomly generated feature values.
202+
- labels: An array of shape ``(6,)`` with categorical labels (0, 1, or 2) for each
203+
sample.
204+
"""
205+
# Fixed number of samples for consistency
164206
num_samples = 6
207+
165208
features = algorithm_globals.random.random((num_samples, num_inputs))
166-
labels = 1.0 * (np.sum(features, axis=1) <= 1)
209+
210+
# Assign categorical labels based on feature sums
211+
sums = np.sum(features, axis=1)
212+
labels = np.full_like(sums, 2.0)
213+
labels[sums <= 0.5] = 0.0
214+
labels[(sums > 0.5) & (sums <= 1.0)] = 1.0
167215

168216
return features, labels
169217

@@ -247,8 +295,13 @@ def test_classifier_with_sampler_qnn_and_cross_entropy(self, opt):
247295
(False, "squared_error"),
248296
)
249297
def test_categorical_data(self, config):
250-
"""Test categorical labels using QNN"""
298+
"""
299+
Tests categorical labels using the QNN classifier with categorical labels.
251300
301+
Args:
302+
config (tuple): Configuration tuple containing whether to use one-hot
303+
encoding and the loss function.
304+
"""
252305
one_hot, loss = config
253306

254307
optimizer = L_BFGS_B(maxiter=5)
@@ -259,20 +312,29 @@ def test_categorical_data(self, config):
259312

260313
features, labels = self._generate_data(num_inputs)
261314
labels = labels.astype(str)
262-
# convert to categorical
315+
316+
# Convert to categorical labels
263317
labels[labels == "0.0"] = "A"
264318
labels[labels == "1.0"] = "B"
265319

266-
# fit to data
320+
# Fit classifier to the data
267321
classifier.fit(features, labels)
268322

269-
# score
323+
# Evaluate the classifier
270324
score = classifier.score(features, labels)
271325
self.assertGreater(score, 0.5)
272326

327+
# Predict a single sample
273328
predict = classifier.predict(features[0, :])
274329
self.assertIn(predict, ["A", "B"])
275330

331+
# Test predict_proba method
332+
probas = classifier.predict_proba(features)
333+
self.assertEqual(probas.shape, (6, 2))
334+
335+
for proba in probas:
336+
self.assertAlmostEqual(np.sum(proba), 1.0, places=5)
337+
276338
@idata(L1L2_ERRORS + ["cross_entropy"])
277339
def test_sparse_arrays(self, loss):
278340
"""Tests classifier with sparse arrays as features and labels."""
@@ -375,7 +437,7 @@ def test_binary_classification_with_multiclass_data(self):
375437
"""Test that trying to train a binary classifier with multiclass data raises an error."""
376438

377439
optimizer = L_BFGS_B(maxiter=5)
378-
qnn, num_inputs, num_parameters = self._create_sampler_qnn(output_shape=1)
440+
qnn, _, num_parameters = self._create_sampler_qnn(output_shape=1)
379441
classifier = self._create_classifier(
380442
qnn,
381443
num_parameters,
@@ -385,11 +447,10 @@ def test_binary_classification_with_multiclass_data(self):
385447

386448
# construct data
387449
num_samples = 3
388-
x = algorithm_globals.random.random((num_samples, num_inputs))
389-
y = np.asarray([0, 1, 2])
450+
features, labels = self._generate_data_multiclass(num_samples)
390451

391452
with self.assertRaises(QiskitMachineLearningError):
392-
classifier.fit(x, y)
453+
classifier.fit(features, labels)
393454

394455
def test_bad_binary_shape(self):
395456
"""Test that trying to train a binary classifier with misshaped data raises an error."""
@@ -435,6 +496,9 @@ def test_untrained(self):
435496
with self.assertRaises(QiskitMachineLearningError, msg="classifier.predict()"):
436497
classifier.predict(np.asarray([]))
437498

499+
with self.assertRaises(QiskitMachineLearningError, msg="classifier.predict_proba()"):
500+
classifier.predict_proba(np.asarray([]))
501+
438502
with self.assertRaises(QiskitMachineLearningError, msg="classifier.fit_result"):
439503
_ = classifier.fit_result
440504

test/algorithms/classifiers/test_pegasos_qsvc.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,34 @@ def setUp(self):
7070
self.label_test_4d = label_4d[15:]
7171

7272
def test_qsvc(self):
73-
"""Test PegasosQSVC"""
74-
qkernel = FidelityQuantumKernel(feature_map=self.feature_map)
75-
76-
pegasos_qsvc = PegasosQSVC(quantum_kernel=qkernel, C=1000, num_steps=self.tau)
77-
78-
pegasos_qsvc.fit(self.sample_train, self.label_train)
79-
score = pegasos_qsvc.score(self.sample_test, self.label_test)
80-
81-
self.assertEqual(score, 1.0)
73+
"""
74+
Test the Pegasos QSVC algorithm.
75+
"""
76+
quantum_kernel = FidelityQuantumKernel(feature_map=self.feature_map)
77+
classifier = PegasosQSVC(quantum_kernel=quantum_kernel, C=1000, num_steps=self.tau)
78+
classifier.fit(self.sample_train, self.label_train)
79+
80+
# Evaluate the model on the test data
81+
test_score = classifier.score(self.sample_test, self.label_test)
82+
self.assertEqual(test_score, 1.0)
83+
84+
# Expected predictions for the given test data
85+
predicted_labels = classifier.predict(self.sample_test)
86+
self.assertTrue(np.array_equal(predicted_labels, self.label_test))
87+
88+
# Test predict_proba method (normalization is imposed by definition)
89+
probas = classifier.predict_proba(self.sample_test)
90+
expected_probas = np.array(
91+
[
92+
[0.67722117, 0.32277883],
93+
[0.35775209, 0.64224791],
94+
[0.36540916, 0.63459084],
95+
[0.64419096, 0.35580904],
96+
[0.35864466, 0.64135534],
97+
]
98+
)
99+
self.assertEqual(probas.shape, (self.label_test.shape[0], 2))
100+
np.testing.assert_array_almost_equal(probas, expected_probas, decimal=5)
82101

83102
def test_decision_function(self):
84103
"""Test PegasosQSVC."""

0 commit comments

Comments
 (0)