Skip to content

Commit

Permalink
owtsne: Ensure data table-only settings properly restored
Browse files Browse the repository at this point in the history
  • Loading branch information
pavlin-policar committed Sep 22, 2023
1 parent ef865df commit 416707a
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 14 deletions.
25 changes: 11 additions & 14 deletions Orange/widgets/unsupervised/owtsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,11 +437,11 @@ def _add_controls_start_box(self):
self.preprocessing_box = gui.vBox(self.controlArea, box="Preprocessing")
self.normalize_cbx = gui.checkBox(
self.preprocessing_box, self, "normalize", "Normalize data",
callback=self._invalidate_normalized_data,
callback=self._invalidate_normalized_data, stateWhenDisabled=False,
)
self.pca_preprocessing_cbx = gui.checkBox(
self.preprocessing_box, self, "use_pca_preprocessing", "Apply PCA preprocessing",
callback=self._pca_preprocessing_changed,
callback=self._pca_preprocessing_changed, stateWhenDisabled=False,
)
self.pca_component_slider = gui.hSlider(
self.preprocessing_box, self, "pca_components", label="PCA Components:",
Expand Down Expand Up @@ -796,21 +796,18 @@ def enable_controls(self):
form.labelForField(self.distance_metric_combo).setDisabled(False)

if has_distance_matrix:
self.normalize = False
self.normalize_cbx.setDisabled(True)
self.normalize_cbx.setToolTip(
"Precomputed distances provided. Preprocessing is unnecessary!"
)

self.use_pca_preprocessing = False
self.pca_preprocessing_cbx.setDisabled(True)
self.pca_preprocessing_cbx.setToolTip(
"Precomputed distances provided. Preprocessing is unnecessary!"
)

# Only spectral init is valid with a precomputed distance matrix
spectral_init_idx = self.initialization_combo.findText("Spectral")
self.initialization_method_idx = spectral_init_idx
self.initialization_combo.setCurrentIndex(spectral_init_idx)
self.initialization_combo.setDisabled(True)
self.initialization_combo.setToolTip(
Expand All @@ -829,7 +826,6 @@ def enable_controls(self):
# PCA doesn't support normalization on sparse data, as this would
# require centering and normalizing the matrix
if not has_distance_matrix and has_data and self.data.is_sparse():
self.normalize = False
self.normalize_cbx.setDisabled(True)
self.normalize_cbx.setToolTip(
"Data normalization is not supported on sparse matrices."
Expand Down Expand Up @@ -872,9 +868,9 @@ def run(self):

initialization_method = INITIALIZATIONS[self.initialization_method_idx][1]
distance_metric = DISTANCE_METRICS[self.distance_metric_idx][1]

if self.distance_matrix is not None:
distance_metric = "precomputed"
initialization_method = "spectral"

task = Task(
data=self.data,
Expand All @@ -901,26 +897,28 @@ def run(self):
return self.start(TSNERunner.run, task)

def __ensure_task_same_for_normalization(self, task: Task):
assert task.data is self.data
assert task.normalize == self.normalize
if task.normalize:
if task.normalize and task.distance_metric != "precomputed":
assert task.data is self.data
assert isinstance(task.normalized_data, Table) and \
len(task.normalized_data) == len(self.data)

def __ensure_task_same_for_pca(self, task: Task):
assert task.data is self.data
assert task.use_pca_preprocessing == self.use_pca_preprocessing
if task.use_pca_preprocessing:
if task.use_pca_preprocessing and task.distance_metric != "precomputed":
assert task.data is self.data
assert task.pca_components == self.pca_components
assert isinstance(task.pca_projection, Table) and \
len(task.pca_projection) == len(self.data)

def __ensure_task_same_for_initialization(self, task: Task):
initialization_method = INITIALIZATIONS[self.initialization_method_idx][1]
assert task.initialization_method == initialization_method
if self.distance_matrix is not None:
n_samples = self.distance_matrix.shape[0]
else:
initialization_method = INITIALIZATIONS[self.initialization_method_idx][1]
# If distance matrix is provided, the control value will be set to
# whatever it was from the context, but we will use `spectral`
assert task.initialization_method == initialization_method
assert self.data is not None
n_samples = self.data.X.shape[0]
assert isinstance(task.initialization, np.ndarray) and \
Expand Down Expand Up @@ -1054,7 +1052,6 @@ def migrate_context(cls, context, version):
from Orange.distance import Euclidean
dist_matrix = Euclidean(data, normalize=True)
WidgetPreview(OWtSNE).run(
# set_data=data,
set_distances=dist_matrix,
set_subset_data=data[np.random.choice(len(data), 10)],
)
55 changes: 55 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owtsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,61 @@ def test_controls_are_properly_disabled_with_distance_matrix_2(self):
for field in disabled_fields:
self.assertTrue(getattr(w.controls, field).isEnabled())

def test_controls_ignored_by_distance_matrix_retain_values_on_table_signal(self):
"""The controls for `normalize`, `pca_preprocessing`, `metric`, and
`initialization` are overridden/ignored when using a distance matrix
signal. However, we want to remember their values when using Data
table signals."""
w = self.widget

self.send_signal(w.Inputs.data, self.iris)
w.normalize_cbx.setChecked(True)
w.pca_preprocessing_cbx.setChecked(True)
w.pca_component_slider.setValue(3)
w.initialization_combo.setCurrentIndex(0)
w.distance_metric_combo.setCurrentIndex(2)
w.perplexity_spin.setValue(42)
# Disconnect data, save context settings
self.send_signal(w.Inputs.data, None)

# Send distances signal
self.send_signal(w.Inputs.distances, self.iris_distances)

self.assertFalse(w.normalize_cbx.isEnabled())

self.assertFalse(w.pca_preprocessing_cbx.isEnabled())
self.assertFalse(w.pca_component_slider.isEnabled())

self.assertFalse(w.initialization_combo.isEnabled())
self.assertEqual(w.initialization_combo.currentText(), "Spectral")

self.assertFalse(w.distance_metric_combo.isEnabled())
self.assertEqual(w.distance_metric_combo.currentText(), "")

self.assertTrue(w.perplexity_spin.isEnabled())
self.assertEqual(w.perplexity_spin.value(), 42)
# Disconnect signal, the context settings should not be overridden
self.send_signal(w.Inputs.distances, None)

# Send data signal, the data-only settings should be restored
self.send_signal(w.Inputs.data, self.iris)

self.assertTrue(w.normalize_cbx.isEnabled())
self.assertTrue(w.normalize_cbx.isChecked())

self.assertTrue(w.pca_preprocessing_cbx.isEnabled())
self.assertTrue(w.pca_preprocessing_cbx.isChecked())
self.assertTrue(w.pca_component_slider.isEnabled())

self.assertTrue(w.initialization_combo.isEnabled())
self.assertTrue(w.initialization_combo.currentText(), "PCA")

self.assertTrue(w.distance_metric_combo.isEnabled())
self.assertEqual(w.distance_metric_combo.currentIndex(), 2)

self.assertTrue(w.perplexity_spin.isEnabled())
self.assertEqual(w.perplexity_spin.value(), 42)


class TestTSNERunner(unittest.TestCase):
@classmethod
Expand Down

0 comments on commit 416707a

Please sign in to comment.