From a9a3291e73773e38a403513020a7e680fe954d55 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 25 Feb 2022 11:38:39 +0100 Subject: [PATCH 1/3] SOM: Fix crash when color is constant --- Orange/widgets/unsupervised/owsom.py | 13 +++++++++++++ Orange/widgets/unsupervised/tests/test_owsom.py | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/Orange/widgets/unsupervised/owsom.py b/Orange/widgets/unsupervised/owsom.py index 65cdda50f5d..faebc1d5aab 100644 --- a/Orange/widgets/unsupervised/owsom.py +++ b/Orange/widgets/unsupervised/owsom.py @@ -833,6 +833,17 @@ def set_color_bins(self): binning = decimal_binnings(col, min_bins=4)[-1] self.thresholds = binning.thresholds[1:-1] self.bin_labels = (binning.labels[1:-1], binning.short_labels[1:-1]) + if not self.bin_labels[0] and binning.labels: + # Nan's are already filtered out, but it doesn't hurt much + # to use nanmax/nanmin + if np.nanmin(col) == np.nanmax(col): + # Handle a degenerate case with a single value + # Use the second threshold (because value must be smaller), + # but the first threshold as label (because that's the + # actual value in the data. + self.thresholds = binning.thresholds[1:] + self.bin_labels = (binning.labels[:1], + binning.short_labels[:1]) palette = BinnedContinuousPalette.from_palette( self.attr_color.palette, binning.thresholds) self.colors = palette @@ -871,6 +882,8 @@ def create_legend(self): def _bin_names(self): labels, short_labels = self.bin_labels + if len(labels) <= 1: + return labels return \ [f"< {labels[0]}"] \ + [f"{x} - {y}" for x, y in zip(labels, short_labels[1:])] \ diff --git a/Orange/widgets/unsupervised/tests/test_owsom.py b/Orange/widgets/unsupervised/tests/test_owsom.py index 7d6630b389b..847caffbe9b 100644 --- a/Orange/widgets/unsupervised/tests/test_owsom.py +++ b/Orange/widgets/unsupervised/tests/test_owsom.py @@ -8,6 +8,7 @@ from Orange.data import Table, Domain from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.tests.utils import simulate from Orange.widgets.utils.annotated_data import ANNOTATED_DATA_FEATURE_NAME from Orange.widgets.unsupervised.owsom import OWSOM, SomView, SOM @@ -220,6 +221,14 @@ def test_attr_color_change(self): self.assertIsNotNone(widget.thresholds) widget._redraw.assert_called() + def test_colored_circles_with_constant(self): + with self.iris.unlocked(): + self.iris.X[:, 0] = 1 + self.send_signal(self.widget.Inputs.data, self.iris) + combo = self.widget.controls.attr_color + simulate.combobox_activate_index( + combo, combo.model().indexOf(self.iris.domain.attributes[0])) + @_patch_recompute_som def test_cell_sizes(self): widget = self.widget From f07d5970b3d2b39aac3fc04a9952b7041d45b8e4 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 25 Feb 2022 15:28:56 +0100 Subject: [PATCH 2/3] SOM: Fix crash when color is constantly nan --- Orange/widgets/unsupervised/owsom.py | 89 ++++++++++++------- .../widgets/unsupervised/tests/test_owsom.py | 32 ++++++- 2 files changed, 87 insertions(+), 34 deletions(-) diff --git a/Orange/widgets/unsupervised/owsom.py b/Orange/widgets/unsupervised/owsom.py index faebc1d5aab..f6d748fc7ac 100644 --- a/Orange/widgets/unsupervised/owsom.py +++ b/Orange/widgets/unsupervised/owsom.py @@ -1,4 +1,5 @@ from collections import defaultdict, namedtuple +from typing import Optional from xml.sax.saxutils import escape import numpy as np @@ -26,7 +27,7 @@ from Orange.widgets.utils.annotated_data import \ create_annotated_table, create_groups_table, ANNOTATED_DATA_SIGNAL_NAME from Orange.widgets.utils.colorpalettes import \ - BinnedContinuousPalette, LimitedDiscretePalette + BinnedContinuousPalette, LimitedDiscretePalette, DiscretePalette from Orange.widgets.visualize.utils import CanvasRectangle, CanvasText from Orange.widgets.visualize.utils.plotutils import wrap_legend_items @@ -210,6 +211,8 @@ class Warning(OWWidget.Warning): ignoring_disc_variables = Msg("SOM ignores categorical variables.") missing_colors = \ Msg("Some data instances have undefined value of '{}'.") + no_defined_colors = \ + Msg("'{}' has no defined values.") missing_values = \ Msg("{} data instance{} with undefined value(s) {} not shown.") single_attribute = Msg("Data contains a single numeric column.") @@ -228,7 +231,12 @@ def __init__(self): self.data = self.cont_x = None self.cells = self.member_data = None self.selection = None - self.colors = self.thresholds = self.bin_labels = None + + # self.colors holds a palette or None when we need to draw same-colored + # circles. This happens by user's choice or when the color attribute + # is numeric and has no defined values, so we can't construct bins + self.colors: Optional[DiscretePalette] = None + self.thresholds = self.bin_labels = None box = gui.vBox(self.controlArea, box="SOM") shape = gui.comboBox( @@ -536,7 +544,7 @@ def _redraw(self): self.elements = QGraphicsItemGroup() self.scene.addItem(self.elements) - if self.attr_color is None: + if self.colors is None: self._draw_same_color(sizes) elif self.pie_charts: self._draw_pie_charts(sizes) @@ -562,6 +570,10 @@ def _draw_same_color(self, sizes): self.elements.addToGroup(ellipse) def _get_color_column(self): + # if self.colors is None, we use _draw_same_color and don't call + # this function + assert self.colors is not None + color_column = \ self.data.get_column_view(self.attr_color)[0].astype(float, copy=False) @@ -571,10 +583,7 @@ def _get_color_column(self): int_col[np.isnan(color_column)] = len(self.colors) else: int_col = np.zeros(len(color_column), dtype=int) - # The following line is unnecessary because rows with missing - # numeric data are excluded. Uncomment it if you change SOM to - # tolerate missing values. - # int_col[np.isnan(color_column)] = len(self.colors) + int_col[np.isnan(color_column)] = len(self.colors) for i, thresh in enumerate(self.thresholds, start=1): int_col[color_column >= thresh] = i return int_col @@ -584,6 +593,7 @@ def _tooltip(self, colors, distribution): values = self.attr_color.values else: values = self._bin_names() + values = list(values) + ["(N/A)"] tot = np.sum(distribution) nbhp = "\N{NON-BREAKING HYPHEN}" return '' + "".join(f""" @@ -600,6 +610,8 @@ def _tooltip(self, colors, distribution): + "
" def _draw_pie_charts(self, sizes): + assert self.colors is not None # if it were, we'd call _draw_same_color + fx, fy = self._grid_factors color_column = self._get_color_column() colors = self.colors.qcolors_w_nan @@ -619,6 +631,8 @@ def _draw_pie_charts(self, sizes): pie.setPos(x + (y % 2) * fx, y * fy) def _draw_colored_circles(self, sizes): + assert self.colors is not None # if it were, we'd call _draw_same_color + fx, fy = self._grid_factors color_column = self._get_color_column() qcolors = self.colors.qcolors_w_nan @@ -820,39 +834,50 @@ def update_output(self): self.Outputs.annotated_data.send(annotated) def set_color_bins(self): + self.Warning.no_defined_colors.clear() + if self.attr_color is None: self.thresholds = self.bin_labels = self.colors = None - elif self.attr_color.is_discrete: + return + + if self.attr_color.is_discrete: self.thresholds = self.bin_labels = None self.colors = self.attr_color.palette + return + + col = self.data.get_column_view(self.attr_color)[0].astype(float) + col = col[np.isfinite(col)] + if not col.size: + self.Warning.no_defined_colors(self.attr_color) + self.thresholds = self.bin_labels = self.colors = None + return + + if self.attr_color.is_time: + binning = time_binnings(col, min_bins=4)[-1] else: - col = self.data.get_column_view(self.attr_color)[0].astype(float) - if self.attr_color.is_time: - binning = time_binnings(col, min_bins=4)[-1] - else: - binning = decimal_binnings(col, min_bins=4)[-1] - self.thresholds = binning.thresholds[1:-1] - self.bin_labels = (binning.labels[1:-1], binning.short_labels[1:-1]) - if not self.bin_labels[0] and binning.labels: - # Nan's are already filtered out, but it doesn't hurt much - # to use nanmax/nanmin - if np.nanmin(col) == np.nanmax(col): - # Handle a degenerate case with a single value - # Use the second threshold (because value must be smaller), - # but the first threshold as label (because that's the - # actual value in the data. - self.thresholds = binning.thresholds[1:] - self.bin_labels = (binning.labels[:1], - binning.short_labels[:1]) - palette = BinnedContinuousPalette.from_palette( - self.attr_color.palette, binning.thresholds) - self.colors = palette + binning = decimal_binnings(col, min_bins=4)[-1] + self.thresholds = binning.thresholds[1:-1] + self.bin_labels = (binning.labels[1:-1], binning.short_labels[1:-1]) + if not self.bin_labels[0] and binning.labels: + # Nan's are already filtered out, but it doesn't hurt much + # to use nanmax/nanmin + if np.nanmin(col) == np.nanmax(col): + # Handle a degenerate case with a single value + # Use the second threshold (because value must be smaller), + # but the first threshold as label (because that's the + # actual value in the data. + self.thresholds = binning.thresholds[1:] + self.bin_labels = (binning.labels[:1], + binning.short_labels[:1]) + palette = BinnedContinuousPalette.from_palette( + self.attr_color.palette, binning.thresholds) + self.colors = palette def create_legend(self): if self.legend is not None: self.scene.removeItem(self.legend) self.legend = None - if self.attr_color is None: + if self.colors is None: return if self.attr_color.is_discrete: @@ -881,7 +906,7 @@ def create_legend(self): self.set_legend_pos() def _bin_names(self): - labels, short_labels = self.bin_labels + labels, short_labels = self.bin_labels or ([], []) if len(labels) <= 1: return labels return \ @@ -898,7 +923,7 @@ def set_legend_pos(self): def send_report(self): self.report_plot() - if self.attr_color: + if self.colors: self.report_caption( f"Self-organizing map colored by '{self.attr_color.name}'") diff --git a/Orange/widgets/unsupervised/tests/test_owsom.py b/Orange/widgets/unsupervised/tests/test_owsom.py index 847caffbe9b..f9ebe851406 100644 --- a/Orange/widgets/unsupervised/tests/test_owsom.py +++ b/Orange/widgets/unsupervised/tests/test_owsom.py @@ -222,12 +222,40 @@ def test_attr_color_change(self): widget._redraw.assert_called() def test_colored_circles_with_constant(self): + domain = self.iris.domain + self.widget.pie_charts = False + with self.iris.unlocked(): self.iris.X[:, 0] = 1 self.send_signal(self.widget.Inputs.data, self.iris) + attr0 = domain.attributes[0] + combo = self.widget.controls.attr_color - simulate.combobox_activate_index( - combo, combo.model().indexOf(self.iris.domain.attributes[0])) + simulate.combobox_activate_index(combo, combo.model().indexOf(attr0)) + self.assertIsNotNone(self.widget.colors) + self.assertFalse(self.widget.Warning.no_defined_colors.is_shown()) + + dom1 = Domain(domain.attributes[1:], domain.class_var, + domain.attributes[:1]) + iris = self.iris.transform(dom1).copy() + with iris.unlocked(iris.metas): + iris.metas[::2, 0] = np.nan + self.send_signal(self.widget.Inputs.data, iris) + simulate.combobox_activate_index(combo, combo.model().indexOf(attr0)) + self.assertIsNotNone(self.widget.colors) + self.assertFalse(self.widget.Warning.no_defined_colors.is_shown()) + + iris = self.iris.transform(dom1).copy() + with iris.unlocked(iris.metas): + iris.metas[:, 0] = np.nan + self.send_signal(self.widget.Inputs.data, iris) + simulate.combobox_activate_index(combo, combo.model().indexOf(attr0)) + self.assertIsNone(self.widget.colors) + self.assertTrue(self.widget.Warning.no_defined_colors.is_shown()) + + simulate.combobox_activate_index(combo, 0) + self.assertIsNone(self.widget.colors) + self.assertFalse(self.widget.Warning.no_defined_colors.is_shown()) @_patch_recompute_som def test_cell_sizes(self): From c00df78de94e82b609bb2a07037bde80a0f62d49 Mon Sep 17 00:00:00 2001 From: janezd Date: Wed, 9 Mar 2022 17:30:33 +0100 Subject: [PATCH 3/3] time_binnings: store thresholds as np.array --- Orange/preprocess/discretize.py | 2 +- Orange/preprocess/tests/test_discretize.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Orange/preprocess/discretize.py b/Orange/preprocess/discretize.py index 1502b97f0f4..ba84b1ec424 100644 --- a/Orange/preprocess/discretize.py +++ b/Orange/preprocess/discretize.py @@ -386,7 +386,7 @@ def _time_binnings(mn, mx, min_pts, max_pts): if not times: continue times = [time.struct_time(t + (0, 0, 0)) for t in times] - thresholds = [calendar.timegm(t) for t in times] + thresholds = np.array([calendar.timegm(t) for t in times]) labels = [time.strftime(fmt, t) for t in times] short_labels = _simplified_labels(labels) if place == 2 and step >= 7: diff --git a/Orange/preprocess/tests/test_discretize.py b/Orange/preprocess/tests/test_discretize.py index 2ad7ad1cbb6..e292d66242f 100644 --- a/Orange/preprocess/tests/test_discretize.py +++ b/Orange/preprocess/tests/test_discretize.py @@ -39,7 +39,8 @@ def tr(ss): def testbin(start, end): bins = _time_binnings(create(*start), create(*end), 3, 51) - return [(bin.width_label, tr(bin.short_labels), bin.thresholds) + return [(bin.width_label, tr(bin.short_labels), + list(bin.thresholds)) for bin in reversed(bins)] self.assertEqual(