skrub-data · mrastgoo · Dec 7, 2024 · Dec 7, 2024 · Dec 7, 2024 · Dec 7, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -29,6 +29,9 @@ Release 0.4.1
 
 Changes
 -------
+* :class: `TableReport` has `write_html` method
+  :pr:`1190` by :user: `Mojdeh Rastgoo<mrastgoo>`.
+
 * A new parameter ``verbose`` has been added to the :class:`TableReport` to toggle on or off the
   printing of progress information when a report is being generated.
   :pr:`1182` by :user:`Priscilla Baah<priscilla-b>`.

diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py
@@ -1,5 +1,7 @@
+import codecs
 import functools
 import json
+from pathlib import Path
 
 from ._html import to_html
 from ._serve import open_in_browser
@@ -197,6 +199,42 @@ def _repr_mimebundle_(self, include=None, exclude=None):
     def _repr_html_(self):
         return self._repr_mimebundle_()["text/html"]
 
+    def write_html(self, file):
+        """Store the report into an HTML file.
+
+        Parameters
+        ----------
+        file : str, pathlib.Path or file object
+            The file object or path of the file to store the HTML output.
+        """
+        html = self.html()
+        if isinstance(file, (str, Path)):
+            with open(file, "w", encoding="utf8") as stream:
+                stream.write(html)
+            return
+
+        try:
+            # We don't have information about the write mode of the provided
+            # file-object. We start by writing bytes into it.
+            file.write(html.encode("utf-8"))
+            return
+        except TypeError:
+            # We end-up here if the file-object was open in text mode
+            # Let's give it another chance in this mode.
+            pass
+
+        if (encoding := getattr(file, "encoding", None)) is not None:
+            try:
+                assert codecs.lookup(encoding).name == "utf-8"
+            except (AssertionError, LookupError):
+                raise ValueError(
+                    "If `file` is a text file it should use utf-8 encoding; got:"
+                    f" {encoding!r}"
+                )
+        # We write into the file-object expecting it to be in text mode at this
+        # stage and with a UTF-8 encoding.
+        file.write(html)
+
     def open(self):
         """Open the HTML report in a web browser."""
         open_in_browser(self.html())
diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py
@@ -1,7 +1,11 @@
+import contextlib
 import datetime
 import json
 import re
 import warnings
+from pathlib import Path
+
+import pytest
 
 from skrub import TableReport, ToDatetime
 from skrub import _dataframe as sbd
@@ -123,6 +127,57 @@ def test_duration(df_module):
     assert re.search(r"2(\.0)?\s+days", TableReport(df).html())
 
 
+@pytest.mark.parametrize(
+    "filename_type",
+    ["str", "Path", "text_file_object", "binary_file_object"],
+)
+def test_write_html(tmp_path, pd_module, filename_type):
+    df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]})
+    report = TableReport(df)
+
+    tmp_file_path = tmp_path / Path("report.html")
+
+    # making sure we are closing the open files, and dealing with the first
+    # condition which doesn't require opening any file
+    with contextlib.ExitStack() as stack:
+        if filename_type == "str":
+            filename = str(tmp_file_path)
+        elif filename_type == "text_file_object":
+            filename = stack.enter_context(open(tmp_file_path, "w", encoding="utf-8"))
+        elif filename_type == "binary_file_object":
+            filename = stack.enter_context(open(tmp_file_path, "wb"))
+        else:
+            filename = tmp_file_path
+
+        report.write_html(filename)
+        assert tmp_file_path.exists()
+
+    with open(tmp_file_path, "r", encoding="utf-8") as file:
+        saved_content = file.read()
+    assert "</html>" in saved_content
+
+
+def test_write_html_with_not_utf8_encoding(tmp_path, pd_module):
+    df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]})
+    report = TableReport(df)
+    tmp_file_path = tmp_path / Path("report.html")
+
+    with open(tmp_file_path, "w", encoding="latin-1") as file:
+        encoding = getattr(file, "encoding", None)
+        with pytest.raises(
+            ValueError,
+            match=(
+                "If `file` is a text file it should use utf-8 encoding; got:"
+                f" {encoding!r}"
+            ),
+        ):
+            report.write_html(file)
+
+    with open(tmp_file_path, "r", encoding="latin-1") as file:
+        saved_content = file.read()
+    assert "</html>" not in saved_content
+
+
 def test_verbosity_parameter(df_module, capsys):
     df = df_module.make_dataframe(
         dict(