Skip to content

Commit

Permalink
Merge branch 'dev' into cov
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 authored Nov 13, 2024
2 parents 146f7e4 + 8c3eecb commit f0400f8
Show file tree
Hide file tree
Showing 14 changed files with 287 additions and 19 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NOTE: run `pre-commit autoupdate` to update hooks to latest version
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
Expand All @@ -18,7 +18,7 @@ repos:
# hooks:
# - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.8
rev: v0.7.3
hooks:
- id: ruff
# - repo: https://github.com/econchick/interrogate
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

### Enhancements
- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188)
- Improved html representation of data in `Container` objects. @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100)
- Added error when using colon for `Container` name. A colon cannot be used as a group name when writing to Zarr on Windows. @stephprince [#1202](https://github.com/hdmf-dev/hdmf/pull/1202)

### Bug fixes
- Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199)
- Fixed incorrect dtype conversion of a StrDataset. @stephprince [#1205](https://github.com/hdmf-dev/hdmf/pull/1205)

## HDMF 3.14.5 (October 6, 2024)

Expand Down
2 changes: 2 additions & 0 deletions docs/gallery/plot_term_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
For more information how to properly format the schema to support LinkML Dynamic Enumerations, please
refer to https://linkml.io/linkml/schemas/enums.html#dynamic-enums.
"""
# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_termset.png'

from hdmf.common import DynamicTable, VectorData
import os
import numpy as np
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/figures/gallery_thumbnails.pptx
Binary file not shown.
32 changes: 31 additions & 1 deletion src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
from ...container import Container
from ...data_utils import AbstractDataChunkIterator
from ...spec import RefSpec, DtypeSpec, NamespaceCatalog
from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset
from ...utils import (docval, getargs, popargs, get_data_shape, get_docval, StrDataset,
get_basic_array_info, generate_array_html_repr)
from ..utils import NamespaceToBuilderHelper, WriteStatusTracker

ROOT_NAME = 'root'
Expand Down Expand Up @@ -1539,3 +1540,32 @@ def set_dataio(cls, **kwargs):
data = H5DataIO(data)
"""
return H5DataIO.__init__(**kwargs)

@staticmethod
def generate_dataset_html(dataset):
"""Generates an html representation for a dataset for the HDF5IO class"""

array_info_dict = get_basic_array_info(dataset)
if isinstance(dataset, h5py.Dataset):

# get info from hdf5 dataset
compressed_size = dataset.id.get_storage_size()
if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
uncompressed_size = dataset.nbytes
else:
uncompressed_size = dataset.size * dataset.dtype.itemsize
compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined"

hdf5_info_dict = {
"Chunk shape": dataset.chunks,
"Compression": dataset.compression,
"Compression opts": dataset.compression_opts,
"Compression ratio": compression_ratio,
}

array_info_dict.update(hdf5_info_dict)

# generate html repr
repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset")

return repr_html
10 changes: 9 additions & 1 deletion src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..build import BuildManager, GroupBuilder
from ..container import Container, HERDManager
from .errors import UnsupportedOperation
from ..utils import docval, getargs, popargs
from ..utils import docval, getargs, popargs, get_basic_array_info, generate_array_html_repr
from warnings import warn


Expand Down Expand Up @@ -188,6 +188,14 @@ def close(self):
''' Close this HDMFIO object to further reading/writing'''
pass

@staticmethod
def generate_dataset_html(dataset):
"""Generates an html representation for a dataset"""
array_info_dict = get_basic_array_info(dataset)
repr_html = generate_array_html_repr(array_info_dict, dataset)

return repr_html

def __enter__(self):
return self

Expand Down
7 changes: 5 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ..query import ReferenceResolver
from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
from ..spec.spec import BaseStorageSpec
from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape
from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape, StrDataset

_const_arg = '__constructor_arg'

Expand Down Expand Up @@ -212,7 +212,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
if (isinstance(value, np.ndarray) or
(hasattr(value, 'astype') and hasattr(value, 'dtype'))):
if spec_dtype_type is _unicode:
ret = value.astype('U')
if isinstance(value, StrDataset):
ret = value
else:
ret = value.astype('U')
ret_dtype = "utf8"
elif spec_dtype_type is _ascii:
ret = value.astype('S')
Expand Down
36 changes: 24 additions & 12 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
import pandas as pd

from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator
from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
from .utils import (docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict,
get_basic_array_info, generate_array_html_repr)

from .term_set import TermSet, TermSetWrapper

Expand Down Expand Up @@ -302,8 +303,8 @@ def __new__(cls, *args, **kwargs):
@docval({'name': 'name', 'type': str, 'doc': 'the name of this container'})
def __init__(self, **kwargs):
name = getargs('name', kwargs)
if '/' in name:
raise ValueError("name '" + name + "' cannot contain '/'")
if ('/' in name or ':' in name) and not self._in_construct_mode:
raise ValueError(f"name '{name}' cannot contain a '/' or ':'")
self.__name = name
self.__field_values = dict()
self.__read_io = None
Expand Down Expand Up @@ -707,8 +708,6 @@ def _generate_html_repr(self, fields, level=0, access_code="", is_field=False):
for index, item in enumerate(fields):
access_code += f'[{index}]'
html_repr += self._generate_field_html(index, item, level, access_code)
elif isinstance(fields, np.ndarray):
html_repr += self._generate_array_html(fields, level)
else:
pass

Expand All @@ -724,18 +723,23 @@ def _generate_field_html(self, key, value, level, access_code):
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

if hasattr(value, "generate_html_repr"):
html_content = value.generate_html_repr(level + 1, access_code)
is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \
(hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array

if is_array_data:
html_content = self._generate_array_html(value, level + 1)
elif hasattr(value, "generate_html_repr"):
html_content = value.generate_html_repr(level + 1, access_code)
elif hasattr(value, '__repr_html__'):
html_content = value.__repr_html__()

elif hasattr(value, "fields"):
elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order
html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True)
elif isinstance(value, (list, dict, np.ndarray)):
html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False)
else:
html_content = f'<span class="field-key">{value}</span>'


html_repr = (
f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
f'class="container-fields field-key" title="{access_code}"><b>{key}</b></summary>'
Expand All @@ -745,10 +749,18 @@ def _generate_field_html(self, key, value, level, access_code):

return html_repr


def _generate_array_html(self, array, level):
"""Generates HTML for a NumPy array."""
str_ = str(array).replace("\n", "</br>")
return f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
"""Generates HTML for array data"""

read_io = self.get_read_io() # if the Container was read from file, get IO object
if read_io is not None: # Note that sometimes numpy array have a read_io attribute
repr_html = read_io.generate_dataset_html(array)
else:
array_info_dict = get_basic_array_info(array)
repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array")

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

@staticmethod
def __smart_str(v, num_indent):
Expand Down
48 changes: 48 additions & 0 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,54 @@ def is_ragged(data):

return False

def get_basic_array_info(array):
def convert_bytes_to_str(bytes_size):
suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
i = 0
while bytes_size >= 1024 and i < len(suffixes)-1:
bytes_size /= 1024.
i += 1
return f"{bytes_size:.2f} {suffixes[i]}"

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size_in_bytes = array.nbytes
else:
array_size_in_bytes = array.size * array.dtype.itemsize
array_size_repr = convert_bytes_to_str(array_size_in_bytes)
basic_array_info_dict = {"Data type": array.dtype, "Shape": array.shape, "Array size": array_size_repr}

return basic_array_info_dict

def generate_array_html_repr(backend_info_dict, array, dataset_type=None):
def html_table(item_dicts) -> str:
"""
Generates an html table from a dictionary
"""
report = '<table class="data-info">'
report += "<tbody>"
for k, v in item_dicts.items():
report += (
f"<tr>"
f'<th style="text-align: left">{k}</th>'
f'<td style="text-align: left">{v}</td>'
f"</tr>"
)
report += "</tbody>"
report += "</table>"
return report

array_info_html = html_table(backend_info_dict)
repr_html = dataset_type + "<br>" + array_info_html if dataset_type is not None else array_info_html

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size = array.nbytes
else:
array_size = array.size * array.dtype.itemsize
array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small:
repr_html += "<br>" + str(np.asarray(array))

return repr_html

class LabelledDict(dict):
"""A dict wrapper that allows querying by an attribute of the values and running a callable on removed items.
Expand Down
6 changes: 5 additions & 1 deletion src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,11 @@ def validate(self, **kwargs):
try:
dtype, string_format = get_type(data, builder.dtype)
if not check_type(self.spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype,
if isinstance(self.spec.dtype, RefSpec):
expected = f'{self.spec.dtype.reftype} reference'
else:
expected = self.spec.dtype
ret.append(DtypeError(self.get_spec_loc(self.spec), expected, dtype,
location=self.get_builder_loc(builder)))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/build_tests/test_convert_dtype.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from datetime import datetime, date

import numpy as np
import h5py
import unittest

from hdmf.backends.hdf5 import H5DataIO
from hdmf.build import ObjectMapper
from hdmf.data_utils import DataChunkIterator
from hdmf.spec import DatasetSpec, RefSpec, DtypeSpec
from hdmf.testing import TestCase
from hdmf.utils import StrDataset

H5PY_3 = h5py.__version__.startswith('3')

class TestConvertDtype(TestCase):

Expand Down Expand Up @@ -321,6 +326,19 @@ def test_text_spec(self):
self.assertIs(ret, value)
self.assertEqual(ret_dtype, 'utf8')

@unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+")
def test_text_spec_str_dataset(self):
text_spec_types = ['text', 'utf', 'utf8', 'utf-8']
for spec_type in text_spec_types:
with self.subTest(spec_type=spec_type):
with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
spec = DatasetSpec('an example dataset', spec_type, name='data')

value = StrDataset(f.create_dataset('data', data=['a', 'b', 'c']), None)
ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion
self.assertIs(ret, value)
self.assertEqual(ret_dtype, 'utf8')

def test_ascii_spec(self):
ascii_spec_types = ['ascii', 'bytes']
for spec_type in ascii_spec_types:
Expand Down
Loading

0 comments on commit f0400f8

Please sign in to comment.