Skip to content

Commit

Permalink
Merge branch 'dev' into prepare_4.0.0_again
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Jan 22, 2025
2 parents 3a0bca0 + ab8cf3b commit b2e5d75
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
- name: Install package with optional dependencies
if: ${{ matrix.opt_req }}
run: |
python -m pip install ".[test,tqdm,zarr,termset]"
python -m pip install ".[test,tqdm,sparse,zarr,termset]"
- name: Run tests and generate coverage report
run: |
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
# hooks:
# - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
rev: v0.9.2
hooks:
- id: ruff
# - repo: https://github.com/econchick/interrogate
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ formats: all
# Optionally set the version of Python and requirements required to build your docs
python:
install:
- path: .[docs,tqdm,zarr,termset] # path to the package relative to the root
- path: .[docs,tqdm,sparse,zarr,termset] # path to the package relative to the root

# Optionally include all submodules
submodules:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Importing from `hdmf.build.map` is no longer supported. Import from `hdmf.build` instead. @rly [#1221](https://github.com/hdmf-dev/hdmf/pull/1221)
- Python 3.8 has reached end of life. Dropped support for Python 3.8 and add support for Python 3.13. @mavaylon1 [#1209](https://github.com/hdmf-dev/hdmf/pull/1209)
- Support for Zarr is limited to versions < 3. @rly [#1229](https://github.com/hdmf-dev/hdmf/pull/1229)
- Scipy is no longer a required dependency. Users using the `CSRMatrix` data type should install `scipy` separately or with `pip install "hdmf[sparse]"`. @rly [#1140](https://github.com/hdmf-dev/hdmf/pull/1140)

### Changed
- Added checks to ensure that group and dataset spec names and default names do not contain slashes. @bendichter [#1219](https://github.com/hdmf-dev/hdmf/pull/1219)
Expand All @@ -19,6 +20,9 @@
### Added
- Added script to check Python version support for HDMF dependencies. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230)

### Fixed
- Fixed issue with `DynamicTable.add_column` not allowing subclasses of `DynamicTableRegion` or `EnumData`. @rly [#1091](https://github.com/hdmf-dev/hdmf/pull/1091)

## HDMF 3.14.6 (December 20, 2024)

### Enhancements
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ dependencies = [
'numpy>=1.19.3',
"pandas>=1.2.0",
"ruamel.yaml>=0.16",
"scipy>=1.7",
]
dynamic = ["version"]

[project.optional-dependencies]
tqdm = ["tqdm>=4.41.0"]
zarr = ["zarr>=2.12.0,<3"]
sparse = ["scipy>=1.7"]
termset = [
"linkml-runtime>=1.5.5",
"schemasheets>=0.1.23",
Expand Down Expand Up @@ -70,7 +70,7 @@ docs = [
]

# all possible dependencies
all = ["hdmf[tqdm,zarr,termset,test,docs]"]
all = ["hdmf[tqdm,zarr,sparse,termset,test,docs]"]

[project.urls]
"Homepage" = "https://github.com/hdmf-dev/hdmf"
Expand Down
3 changes: 1 addition & 2 deletions src/hdmf/common/io/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,11 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i
required=field_spec.required
)
dtype = cls._get_type(field_spec, type_map)
column_conf['class'] = dtype
if issubclass(dtype, DynamicTableRegion):
# the spec does not know which table this DTR points to
# the user must specify the table attribute on the DTR after it is generated
column_conf['table'] = True
else:
column_conf['class'] = dtype

index_counter = 0
index_name = attr_name
Expand Down
22 changes: 17 additions & 5 deletions src/hdmf/common/sparse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import scipy.sparse as sps
try:
from scipy.sparse import csr_matrix
SCIPY_INSTALLED = True
except ImportError:
SCIPY_INSTALLED = False
class csr_matrix: # dummy class to prevent import errors
pass

from . import register_class
from ..container import Container
from ..utils import docval, popargs, to_uint_array, get_data_shape, AllowPositional
Expand All @@ -7,7 +14,7 @@
@register_class('CSRMatrix')
class CSRMatrix(Container):

@docval({'name': 'data', 'type': (sps.csr_matrix, 'array_data'),
@docval({'name': 'data', 'type': (csr_matrix, 'array_data'),
'doc': 'the data to use for this CSRMatrix or CSR data array.'
'If passing CSR data array, *indices*, *indptr*, and *shape* must also be provided'},
{'name': 'indices', 'type': 'array_data', 'doc': 'CSR index array', 'default': None},
Expand All @@ -16,13 +23,17 @@ class CSRMatrix(Container):
{'name': 'name', 'type': str, 'doc': 'the name to use for this when storing', 'default': 'csr_matrix'},
allow_positional=AllowPositional.WARNING)
def __init__(self, **kwargs):
if not SCIPY_INSTALLED:
raise ImportError(
"scipy must be installed to use CSRMatrix. Please install scipy using `pip install scipy`."
)
data, indices, indptr, shape = popargs('data', 'indices', 'indptr', 'shape', kwargs)
super().__init__(**kwargs)
if not isinstance(data, sps.csr_matrix):
if not isinstance(data, csr_matrix):
temp_shape = get_data_shape(data)
temp_ndim = len(temp_shape)
if temp_ndim == 2:
data = sps.csr_matrix(data)
data = csr_matrix(data)
elif temp_ndim == 1:
if any(_ is None for _ in (indptr, indices, shape)):
raise ValueError("Must specify 'indptr', 'indices', and 'shape' arguments when passing data array.")
Expand All @@ -31,9 +42,10 @@ def __init__(self, **kwargs):
shape = self.__check_arr(shape, 'shape')
if len(shape) != 2:
raise ValueError("'shape' argument must specify two and only two dimensions.")
data = sps.csr_matrix((data, indices, indptr), shape=shape)
data = csr_matrix((data, indices, indptr), shape=shape)
else:
raise ValueError("'data' argument cannot be ndarray of dimensionality > 2.")
# self.__data is a scipy.sparse.csr_matrix
self.__data = data

@staticmethod
Expand Down
51 changes: 32 additions & 19 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def _init_class_columns(self):
description=col['description'],
index=col.get('index', False),
table=col.get('table', False),
col_cls=col.get('class', VectorData),
col_cls=col.get('class'),
# Pass through extra kwargs for add_column that subclasses may have added
**{k: col[k] for k in col.keys()
if k not in DynamicTable.__reserved_colspec_keys})
Expand Down Expand Up @@ -564,10 +564,13 @@ def _set_dtr_targets(self, target_tables: dict):
if not column_conf.get('table', False):
raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table."
% colname)
self.add_column(name=column_conf['name'],
description=column_conf['description'],
index=column_conf.get('index', False),
table=True)
self.add_column(
name=column_conf['name'],
description=column_conf['description'],
index=column_conf.get('index', False),
table=True,
col_cls=column_conf.get('class'),
)
if isinstance(self[colname], VectorIndex):
col = self[colname].target
else:
Expand Down Expand Up @@ -681,7 +684,7 @@ def add_row(self, **kwargs):
index=col.get('index', False),
table=col.get('table', False),
enum=col.get('enum', False),
col_cls=col.get('class', VectorData),
col_cls=col.get('class'),
# Pass through extra keyword arguments for add_column that
# subclasses may have added
**{k: col[k] for k in col.keys()
Expand Down Expand Up @@ -753,7 +756,7 @@ def __eq__(self, other):
'default': False},
{'name': 'enum', 'type': (bool, 'array_data'), 'default': False,
'doc': ('whether or not this column contains data from a fixed set of elements')},
{'name': 'col_cls', 'type': type, 'default': VectorData,
{'name': 'col_cls', 'type': type, 'default': None,
'doc': ('class to use to represent the column data. If table=True, this field is ignored and a '
'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData '
'object is used.')},
Expand Down Expand Up @@ -805,29 +808,39 @@ def add_column(self, **kwargs): # noqa: C901
% (name, self.__class__.__name__, spec_index))
warn(msg, stacklevel=3)

spec_col_cls = self.__uninit_cols[name].get('class', VectorData)
if col_cls != spec_col_cls:
msg = ("Column '%s' is predefined in %s with class=%s which does not match the entered "
"col_cls argument. The predefined class spec will be ignored. "
"Please ensure the new column complies with the spec. "
"This will raise an error in a future version of HDMF."
% (name, self.__class__.__name__, spec_col_cls))
warn(msg, stacklevel=2)

ckwargs = dict(kwargs)

# Add table if it's been specified
if table and enum:
raise ValueError("column '%s' cannot be both a table region "
"and come from an enumerable set of elements" % name)
# Update col_cls if table is specified
if table is not False:
col_cls = DynamicTableRegion
if col_cls is None:
col_cls = DynamicTableRegion
if isinstance(table, DynamicTable):
ckwargs['table'] = table
# Update col_cls if enum is specified
if enum is not False:
col_cls = EnumData
if col_cls is None:
col_cls = EnumData
if isinstance(enum, (list, tuple, np.ndarray, VectorData)):
ckwargs['elements'] = enum
# Update col_cls to the default VectorData if col_cls is None
if col_cls is None:
col_cls = VectorData

if name in self.__uninit_cols: # column is a predefined optional column from the spec
# check the given values against the predefined optional column spec. if they do not match, raise a warning
# and ignore the given arguments. users should not be able to override these values
spec_col_cls = self.__uninit_cols[name].get('class')
if spec_col_cls is not None and col_cls != spec_col_cls:
msg = ("Column '%s' is predefined in %s with class=%s which does not match the entered "
"col_cls argument. The predefined class spec will be ignored. "
"Please ensure the new column complies with the spec. "
"This will raise an error in a future version of HDMF."
% (name, self.__class__.__name__, spec_col_cls))
warn(msg, stacklevel=2)

# If the user provided a list of lists that needs to be indexed, then we now need to flatten the data
# We can only create the index actual VectorIndex once we have the VectorData column so we compute
Expand Down Expand Up @@ -873,7 +886,7 @@ def add_column(self, **kwargs): # noqa: C901
if col in self.__uninit_cols:
self.__uninit_cols.pop(col)

if col_cls is EnumData:
if issubclass(col_cls, EnumData):
columns.append(col.elements)
col.elements.parent = self

Expand Down
49 changes: 47 additions & 2 deletions tests/unit/common/test_generate_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
class TestDynamicDynamicTable(TestCase):

def setUp(self):

self.dtr_spec = DatasetSpec(
data_type_def='CustomDTR',
data_type_inc='DynamicTableRegion',
doc='a test DynamicTableRegion column', # this is overridden where it is used
)

self.dt_spec = GroupSpec(
'A test extension that contains a dynamic table',
data_type_def='TestTable',
Expand Down Expand Up @@ -99,14 +106,21 @@ def setUp(self):
doc='a test column',
dtype='float',
quantity='?',
)
),
DatasetSpec(
data_type_inc='CustomDTR',
name='optional_custom_dtr_col',
doc='a test DynamicTableRegion column',
quantity='?'
),
]
)

from hdmf.spec.write import YAMLSpecWriter
writer = YAMLSpecWriter(outdir='.')

self.spec_catalog = SpecCatalog()
self.spec_catalog.register_spec(self.dtr_spec, 'test.yaml')
self.spec_catalog.register_spec(self.dt_spec, 'test.yaml')
self.spec_catalog.register_spec(self.dt_spec2, 'test.yaml')
self.namespace = SpecNamespace(
Expand All @@ -124,7 +138,7 @@ def setUp(self):
self.test_dir = tempfile.mkdtemp()
spec_fpath = os.path.join(self.test_dir, 'test.yaml')
namespace_fpath = os.path.join(self.test_dir, 'test-namespace.yaml')
writer.write_spec(dict(groups=[self.dt_spec, self.dt_spec2]), spec_fpath)
writer.write_spec(dict(datasets=[self.dtr_spec], groups=[self.dt_spec, self.dt_spec2]), spec_fpath)
writer.write_namespace(self.namespace, namespace_fpath)
self.namespace_catalog = NamespaceCatalog()
hdmf_typemap = get_type_map()
Expand All @@ -133,6 +147,7 @@ def setUp(self):
self.type_map.load_namespaces(namespace_fpath)
self.manager = BuildManager(self.type_map)

self.CustomDTR = self.type_map.get_dt_container_cls('CustomDTR', CORE_NAMESPACE)
self.TestTable = self.type_map.get_dt_container_cls('TestTable', CORE_NAMESPACE)
self.TestDTRTable = self.type_map.get_dt_container_cls('TestDTRTable', CORE_NAMESPACE)

Expand Down Expand Up @@ -228,6 +243,22 @@ def test_dynamic_table_region_non_dtr_target(self):
self.TestDTRTable(name='test_dtr_table', description='my table',
target_tables={'optional_col3': test_table})

def test_custom_dtr_class(self):
test_table = self.TestTable(name='test_table', description='my test table')
test_table.add_row(my_col=3.0, indexed_col=[1.0, 3.0], optional_col2=.5)
test_table.add_row(my_col=4.0, indexed_col=[2.0, 4.0], optional_col2=.5)

test_dtr_table = self.TestDTRTable(name='test_dtr_table', description='my table',
target_tables={'optional_custom_dtr_col': test_table})

self.assertIsInstance(test_dtr_table['optional_custom_dtr_col'], self.CustomDTR)
self.assertEqual(test_dtr_table['optional_custom_dtr_col'].description, "a test DynamicTableRegion column")
self.assertIs(test_dtr_table['optional_custom_dtr_col'].table, test_table)

test_dtr_table.add_row(ref_col=0, indexed_ref_col=[0, 1], optional_custom_dtr_col=0)
test_dtr_table.add_row(ref_col=0, indexed_ref_col=[0, 1], optional_custom_dtr_col=1)
self.assertEqual(test_dtr_table['optional_custom_dtr_col'].data, [0, 1])

def test_attribute(self):
test_table = self.TestTable(name='test_table', description='my test table')
assert test_table.my_col is not None
Expand Down Expand Up @@ -266,3 +297,17 @@ def test_roundtrip(self):
for err in errors:
raise Exception(err)
self.reader.close()

def test_add_custom_dtr_column(self):
test_table = self.TestTable(name='test_table', description='my test table')
test_table.add_column(
name='custom_dtr_column',
description='this is a custom DynamicTableRegion column',
col_cls=self.CustomDTR,
)
self.assertIsInstance(test_table['custom_dtr_column'], self.CustomDTR)
self.assertEqual(test_table['custom_dtr_column'].description, 'this is a custom DynamicTableRegion column')

test_table.add_row(my_col=3.0, indexed_col=[1.0, 3.0], custom_dtr_column=0)
test_table.add_row(my_col=4.0, indexed_col=[2.0, 4.0], custom_dtr_column=1)
self.assertEqual(test_table['custom_dtr_column'].data, [0, 1])
22 changes: 19 additions & 3 deletions tests/unit/common/test_sparse.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
from hdmf.common import CSRMatrix
from hdmf.testing import TestCase, H5RoundTripMixin

import scipy.sparse as sps
import numpy as np
import unittest

try:
import scipy.sparse as sps
SCIPY_INSTALLED = True
except ImportError:
SCIPY_INSTALLED = False


@unittest.skipIf(SCIPY_INSTALLED, "scipy is installed")
class TestCSRMatrixNoScipy(TestCase):

def test_import_error(self):
data = np.array([[1, 0, 2], [0, 0, 3], [4, 5, 6]])
with self.assertRaises(ImportError):
CSRMatrix(data=data)


@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed")
class TestCSRMatrix(TestCase):

def test_from_sparse_matrix(self):
Expand Down Expand Up @@ -153,7 +168,7 @@ def test_array_bad_dim(self):
with self.assertRaisesWith(ValueError, msg):
CSRMatrix(data=data, indices=indices, indptr=indptr, shape=shape)


@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed")
class TestCSRMatrixRoundTrip(H5RoundTripMixin, TestCase):

def setUpContainer(self):
Expand All @@ -164,6 +179,7 @@ def setUpContainer(self):
return CSRMatrix(data=data, indices=indices, indptr=indptr, shape=shape)


@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed")
class TestCSRMatrixRoundTripFromLists(H5RoundTripMixin, TestCase):
"""Test that CSRMatrix works with lists as well"""

Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ extras =
# which optional dependency set(s) to use (default: none)
pytest: test
gallery: doc
optional: tqdm,zarr,termset
optional: tqdm,sparse,zarr,termset
commands =
# commands to run for every environment
python --version # print python version for debugging
Expand Down

0 comments on commit b2e5d75

Please sign in to comment.