Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Route array representation for HTML #1206

Merged
merged 16 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,7 +1541,7 @@

array_info_dict = get_basic_array_info(dataset)
if isinstance(dataset, h5py.Dataset):

dataset_type = "HDF5 dataset"
# get info from hdf5 dataset
compressed_size = dataset.id.get_storage_size()
if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
Expand All @@ -1556,10 +1556,13 @@
"Compression opts": dataset.compression_opts,
"Compression ratio": compression_ratio,
}

array_info_dict.update(hdf5_info_dict)

# generate html repr
repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset")
elif isinstance(dataset, np.ndarray):
dataset_type = "NumPy array"

Check warning on line 1562 in src/hdmf/backends/hdf5/h5tools.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5tools.py#L1562

Added line #L1562 was not covered by tests
else:
dataset_type = dataset.__class__.__name__

Check warning on line 1564 in src/hdmf/backends/hdf5/h5tools.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5tools.py#L1564

Added line #L1564 was not covered by tests

repr_html = generate_array_html_repr(array_info_dict, dataset, dataset_type)

return repr_html
32 changes: 25 additions & 7 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,8 +707,11 @@
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \
(hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array
# Detects array-like objects that conform to the Array Interface specification
# (e.g., NumPy arrays, HDF5 datasets, DataIO objects). Objects must have both
# 'shape' and 'dtype' attributes. Iterators are excluded as they lack 'shape'.
# This approach keeps the implementation generic without coupling to specific backends methods
is_array_data = hasattr(value, "shape") and hasattr(value, "dtype")

if is_array_data:
html_content = self._generate_array_html(value, level + 1)
Expand All @@ -735,14 +738,29 @@


def _generate_array_html(self, array, level):
"""Generates HTML for array data"""
"""Generates HTML for array data (e.g., NumPy arrays, HDF5 datasets, Zarr datasets and DataIO objects)."""

read_io = self.get_read_io() # if the Container was read from file, get IO object
if read_io is not None: # Note that sometimes numpy array have a read_io attribute
repr_html = read_io.generate_dataset_html(array)
else:
is_numpy_array = isinstance(array, np.ndarray)
it_was_read_with_io = self.get_read_io() is not None
oruebel marked this conversation as resolved.
Show resolved Hide resolved
h-mayorquin marked this conversation as resolved.
Show resolved Hide resolved
is_data_io = isinstance(array, DataIO)

if is_numpy_array:
array_info_dict = get_basic_array_info(array)
repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array")
elif is_data_io:
array_info_dict = get_basic_array_info(array.data)
repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO")

Check warning on line 752 in src/hdmf/container.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/container.py#L751-L752

Added lines #L751 - L752 were not covered by tests
elif it_was_read_with_io:
# The backend handles the representation here. Two special cases worth noting:
# 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays
# even when their parent container has an IO
# 2. Data may have been modified after being read from storage
read_io = self.get_read_io()
h-mayorquin marked this conversation as resolved.
Show resolved Hide resolved
repr_html = read_io.generate_dataset_html(array)
else: # Not sure which object could get here
oruebel marked this conversation as resolved.
Show resolved Hide resolved
object_class = array.__class__.__name__
array_info_dict = get_basic_array_info(array.data)
repr_html = generate_array_html_repr(array_info_dict, array.data, object_class)

Check warning on line 763 in src/hdmf/container.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/container.py#L761-L763

Added lines #L761 - L763 were not covered by tests

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

Expand Down
20 changes: 14 additions & 6 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@

return basic_array_info_dict

def generate_array_html_repr(backend_info_dict, array, dataset_type=None):
def generate_array_html_repr(array_info_dict, array, dataset_type=None):
def html_table(item_dicts) -> str:
"""
Generates an html table from a dictionary
Expand All @@ -912,14 +912,22 @@
report += "</table>"
return report

array_info_html = html_table(backend_info_dict)
array_info_html = html_table(array_info_dict)
repr_html = dataset_type + "<br>" + array_info_html if dataset_type is not None else array_info_html

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size = array.nbytes
# Array like might lack nbytes (h5py < 3.0) or size (DataIO object)
if hasattr(array, "nbytes"):
array_size_bytes = array.nbytes
else:
array_size = array.size * array.dtype.itemsize
array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array
if hasattr(array, "size"):
array_size = array.size

Check warning on line 923 in src/hdmf/utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/utils.py#L923

Added line #L923 was not covered by tests
else:
import math
array_size = math.prod(array.shape)
array_size_bytes = array_size * array.dtype.itemsize

Check warning on line 927 in src/hdmf/utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/utils.py#L925-L927

Added lines #L925 - L927 were not covered by tests

# Heuristic for displaying data
array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small:
repr_html += "<br>" + str(np.asarray(array))

Expand Down
Loading