skrub-data · mrastgoo · Dec 7, 2024 · Dec 7, 2024 · Dec 7, 2024 · Dec 7, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -29,6 +29,10 @@ Release 0.4.1
 
 Changes
 -------
+* :class: `TableReport` has `write_html` method
+  :pr:`1190` by :user: `Mojdeh Rastgoo<mrastgoo>`.
+
+* A new parameter `verbose` has been added to the :class:`TableReport` to toggle on or off the
 * A new parameter ``verbose`` has been added to the :class:`TableReport` to toggle on or off the
   printing of progress information when a report is being generated.
   :pr:`1182` by :user:`Priscilla Baah<priscilla-b>`.

diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py
@@ -1,5 +1,8 @@
+import codecs
 import functools
 import json
+import locale
+from pathlib import Path
 
 from ._html import to_html
 from ._serve import open_in_browser
@@ -197,6 +200,43 @@
     def _repr_html_(self):
         return self._repr_mimebundle_()["text/html"]
 
+    def write_html(self, file):
+        """Store the report into an HTML file.
+
+        Parameters
+        ----------
+        file : str, pathlib.Path or file object
+            The file object or path of the file to store the HTML output.
+        """
+        html = self.html()
+        if isinstance(file, (str, Path)):
+            with open(file, "w", encoding="utf8") as stream:
+                stream.write(html)
+            return
+        try:
+            file.write(html.encode("utf-8"))
+            return
+        except TypeError:
+            pass
+
+        print(getattr(file, "encoding", None))
+        if (encoding := getattr(file, "encoding", None)) is not None:
+            try:
+                assert codecs.lookup(encoding).name == "utf-8"
+            except (AssertionError, LookupError):
+                raise ValueError(
+                    "If `file` is a text file it should use utf-8 encoding; got:"
+                    f" {encoding!r}"
+                )
+        elif locale.getencoding().lower() != "utf-8":
+            # when encoding=None, it will default on the platform-specific encoding
+            # raise if not utf-8
+            raise ValueError(
+                f"Platform encoding is not utf-8; got {locale.getencoding()}"
+            )
+
+        file.write(html)
+
     def open(self):
         """Open the HTML report in a web browser."""
         open_in_browser(self.html())
diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py
@@ -2,6 +2,9 @@
 import json
 import re
 import warnings
+from pathlib import Path
+
+import pytest
 
 from skrub import TableReport, ToDatetime
 from skrub import _dataframe as sbd
@@ -123,6 +126,49 @@ def test_duration(df_module):
     assert re.search(r"2(\.0)?\s+days", TableReport(df).html())
 
 
+@pytest.mark.parametrize(
+    "filename_type",
+    ["str", "Path", "file_object", "binary_mode"],
+)
+def test_write_html(tmp_path, pd_module, filename_type):
+    df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]})
+    report = TableReport(df)
+
+    tmp_file_path = tmp_path / Path("report.html")
+
+    if filename_type == "str":
+        filename = str(tmp_file_path)
+    elif filename_type == "file_object":
+        filename = open(tmp_file_path, "w", encoding="utf-8")
+    elif filename_type == "binary_mode":
+        filename = open(tmp_file_path, "wb")
+    else:
+        filename = tmp_file_path
+
+    report.write_html(filename)
+    assert tmp_file_path.exists()
+
+    with open(tmp_file_path, "r", encoding="utf-8") as file:
+        saved_content = file.read()
+    assert "</html>" in saved_content
+
+
+def test_write_html_with_not_utf8_encoding(tmp_path, pd_module):
+    df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]})
+    report = TableReport(df)
+
+    filename = open(tmp_path / Path("report.html"), "w", encoding="latin-1")
+    encoding = getattr(filename, "encoding", None)
+    with pytest.raises(
+        ValueError,
+        match=(
+            f"If `file` is a text file it should use utf-8 encoding; got: {encoding!r}"
+        ),
+    ):
+        report.write_html(filename)
+        assert not filename.exists()
-        report.write_html(filename)
-        assert not filename.exists()
+        report.write_html(filename)
+    assert not filename.exists()
-        report.write_html(filename)
-        assert not filename.exists()
+        report.write_html(filename)
+    assert not filename.exists()
+
+
 def test_verbosity_parameter(df_module, capsys):
     df = df_module.make_dataframe(
         dict(