diff --git a/CHANGES.rst b/CHANGES.rst index fea1d2e94..77e5b56f1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -40,6 +40,10 @@ Bug fixes :user:`Jérôme Dockès ` and the matplotlib issue can be tracked [here](https://github.com/matplotlib/matplotlib/issues/25041). +* The labels on bar plots in the ``TableReport`` for columns of object dtypes + that have a repr spanning multiple lines could be unreadable. This has been + fixed in :pr:`1196` by :user:`Jérôme Dockès `. + * Improve the performance of :func:`deduplicate` by removing some unnecessary computations. :pr:`1193` by :user:`Jérôme Dockès `. diff --git a/skrub/_reporting/_utils.py b/skrub/_reporting/_utils.py index b5f0e802b..c2962fc72 100644 --- a/skrub/_reporting/_utils.py +++ b/skrub/_reporting/_utils.py @@ -43,8 +43,8 @@ def quantiles(column): def ellide_string(s, max_len=30): """Shorten a string so it can be used as a plot axis title or label.""" - if not isinstance(s, str): - return s + s = str(s) + # normalize whitespace s = re.sub(r"\s+", " ", s) if len(s) <= max_len: diff --git a/skrub/_reporting/tests/test_utils.py b/skrub/_reporting/tests/test_utils.py index 10f7c3e82..06c4759e4 100644 --- a/skrub/_reporting/tests/test_utils.py +++ b/skrub/_reporting/tests/test_utils.py @@ -11,7 +11,7 @@ @pytest.mark.parametrize( "s_in, s_out", [ - (1, 1), + (1, "1"), ("aa", "aa"), ("a\na", "a a"), ("a" * 70, "a" * 30 + "…\u200e"), @@ -55,6 +55,16 @@ def test_ellide_string_empty(): assert _utils.ellide_string(" a", 1) == "…" +def test_ellide_non_string(): + # non-regression for #1195: objects in columns must be converted to strings + # before elliding and plotting + class A: + def __repr__(self): + return "one\ntwo\nthree" + + assert _utils.ellide_string(A()) == "one two three" + + @pytest.mark.parametrize( "n_in, n_out", [