-
Notifications
You must be signed in to change notification settings - Fork 105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH adding write_html
to TableReport
#1190
base: main
Are you sure you want to change the base?
Changes from all commits
e8dfaf6
58b8a65
048cec5
15c7b06
0debd0b
260d710
684a9c1
597bc0e
ef50a50
bbb50d9
f3c1e8b
760b7b3
0789ac5
911eb5a
036db59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,11 @@ | ||
import contextlib | ||
import datetime | ||
import json | ||
import re | ||
import warnings | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from skrub import TableReport, ToDatetime | ||
from skrub import _dataframe as sbd | ||
|
@@ -123,6 +127,57 @@ def test_duration(df_module): | |
assert re.search(r"2(\.0)?\s+days", TableReport(df).html()) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"filename_type", | ||
["str", "Path", "text_file_object", "binary_file_object"], | ||
) | ||
def test_write_html(tmp_path, pd_module, filename_type): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test is looking great! the last thing we need to take care of is to make sure we close the file if we opened it (otherwise we "leak" a resource: the opened file handle. that could be a problem for example to clean up the temp directory on windows as it refuses to remove files that have an open file handle). Usually we ensure that with a simple context manager like: with open(tmp_file_path, 'w', encoding='utf-8') as file:
file.write('hello') but here we have a tricky situation because in some cases we have a string or path (which require no closing) and sometimes we have a file object which does require closing. The standard library module with contextlib.ExitStack() as stack:
if file_type == 'str':
file = str(tmp_file_path)
elif file_type == 'text_file_object':
file = stack.enter_context(open(tmp_file_path, 'w', encoding='utf-8'))
# ...
report.write_html(file)
# if we opened it the file is closed here when we exit the `with` block This option using ExitStack is my favorite because the file is being managed by a context manager as soon as it is opened. Another way is to use if file_type == 'str':
file = contextlib.nullcontext(str(tmp_file_path))
elif file_type == 'text_file_object':
file = open(tmp_file_path, 'w', encoding='utf-8')
with file:
report.write_html(file) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wow contextlib.ExitStack() is very nice There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe you could add a comment on L139 to explain why it's a good idea to use contextlib here? |
||
df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) | ||
report = TableReport(df) | ||
|
||
tmp_file_path = tmp_path / Path("report.html") | ||
|
||
# making sure we are closing the open files, and dealing with the first | ||
# condition which doesn't require opening any file | ||
with contextlib.ExitStack() as stack: | ||
if filename_type == "str": | ||
filename = str(tmp_file_path) | ||
elif filename_type == "text_file_object": | ||
filename = stack.enter_context(open(tmp_file_path, "w", encoding="utf-8")) | ||
elif filename_type == "binary_file_object": | ||
filename = stack.enter_context(open(tmp_file_path, "wb")) | ||
else: | ||
filename = tmp_file_path | ||
|
||
report.write_html(filename) | ||
assert tmp_file_path.exists() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't need to check the full content but maybe we could see that the file contains |
||
|
||
with open(tmp_file_path, "r", encoding="utf-8") as file: | ||
saved_content = file.read() | ||
assert "</html>" in saved_content | ||
|
||
|
||
def test_write_html_with_not_utf8_encoding(tmp_path, pd_module): | ||
df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) | ||
report = TableReport(df) | ||
tmp_file_path = tmp_path / Path("report.html") | ||
|
||
with open(tmp_file_path, "w", encoding="latin-1") as file: | ||
encoding = getattr(file, "encoding", None) | ||
with pytest.raises( | ||
ValueError, | ||
match=( | ||
"If `file` is a text file it should use utf-8 encoding; got:" | ||
f" {encoding!r}" | ||
), | ||
): | ||
report.write_html(file) | ||
|
||
with open(tmp_file_path, "r", encoding="latin-1") as file: | ||
saved_content = file.read() | ||
assert "</html>" not in saved_content | ||
|
||
|
||
def test_verbosity_parameter(df_module, capsys): | ||
df = df_module.make_dataframe( | ||
dict( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment explaining what
html
is expected (or not expected) to be at line 230?Additionally, light inline documentation/comments on the steps above would help readability :)