Skip to content

Commit

Permalink
Field selection for simple compound types (#173)
Browse files Browse the repository at this point in the history
* Field selection for simple compound types

* Add logging to test_datatype
  • Loading branch information
mattjala authored Mar 26, 2024
1 parent 4162811 commit b61fa0e
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 29 deletions.
37 changes: 13 additions & 24 deletions h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,9 @@ def __getitem__(self, args, new_dtype=None):
req = "/datasets/" + self.id.uuid + "/value"
params = {}

if len(names) > 0:
params["fields"] = ":".join(names)

if self.id._http_conn.mode == "r" and self.id._http_conn.cache_on:
# enables lambda to be used on server
self.log.debug("setting nonstrict parameter")
Expand Down Expand Up @@ -1483,41 +1486,23 @@ def __setitem__(self, args, val):
last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,))
mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
mshape = val.shape[0:len(val.shape)-len(shp)]
"""

# Make a compound memory type if field-name slicing is required
elif len(names) != 0:
mshape = val.shape
# Check for field selection
if len(names) != 0:
# Catch common errors
if self.dtype.fields is None:
raise TypeError("Illegal slicing argument (not a compound dataset)")
mismatch = [x for x in names if x not in self.dtype.fields]
if len(mismatch) != 0:
mismatch = ", ".join('"%s"'%x for x in mismatch)
mismatch = ", ".join('"%s"' % x for x in mismatch)
raise ValueError("Illegal slicing argument (fields %s not in dataset type)" % mismatch)

# Write non-compound source into a single dataset field
if len(names) == 1 and val.dtype.fields is None:
subtype = h5y.py_create(val.dtype)
mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
mtype.insert(self._e(names[0]), 0, subtype)
# Make a new source type keeping only the requested fields
else:
fieldnames = [x for x in val.dtype.names if x in names] # Keep source order
mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
for fieldname in fieldnames:
subtype = h5t.py_create(val.dtype.fields[fieldname][0])
offset = val.dtype.fields[fieldname][1]
mtype.insert(self._e(fieldname), offset, subtype)
# Use mtype derived from array (let DatasetID.write figure it out)
else:
mshape = val.shape
#mtype = None
"""
# mtype = None

mshape = val.shape
self.log.debug(f"mshape: {mshape}")
self.log.debug(f"data dtype: {val.dtype}")
Expand Down Expand Up @@ -1582,6 +1567,10 @@ def __setitem__(self, args, val):
self.log.debug(f"got select query param: {select_param}")
params["select"] = select_param

# Perform write to subset of named fields within compound datatype, if any
if len(names) > 0:
params["fields"] = ":".join(names)

self.PUT(req, body=body, format=format, params=params)
"""
mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
Expand Down
13 changes: 8 additions & 5 deletions h5pyd/_hl/h5type.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,10 +441,14 @@ def getTypeItem(dt):
type_info['length'] = 'H5T_VARIABLE'
type_info['charSet'] = 'H5T_CSET_UTF8'
type_info['strPad'] = 'H5T_STR_NULLTERM'
elif vlen_check == int:
elif vlen_check in (int, np.int64):
type_info['class'] = 'H5T_VLEN'
type_info['size'] = 'H5T_VARIABLE'
type_info['base'] = 'H5T_STD_I64'
elif vlen_check == np.int32:
type_info['class'] = 'H5T_VLEN'
type_info['size'] = 'H5T_VARIABLE'
type_info['base'] = 'H5T_STD_I32'
elif vlen_check in (float, np.float64):
type_info['class'] = 'H5T_VLEN'
type_info['size'] = 'H5T_VARIABLE'
Expand All @@ -456,7 +460,7 @@ def getTypeItem(dt):
type_info['base'] = getTypeItem(vlen_check)
elif vlen_check is not None:
# unknown vlen type
raise TypeError("Unknown h5py vlen type: " + str(vlen_check))
raise TypeError("Unknown h5pyd vlen type: " + str(vlen_check))
elif ref_check is not None:
# a reference type
type_info['class'] = 'H5T_REFERENCE'
Expand Down Expand Up @@ -781,7 +785,7 @@ def createBaseDataType(typeItem):
raise TypeError("ArrayType is not supported for variable len types")
if 'base' not in typeItem:
raise KeyError("'base' not provided")
baseType = createBaseDataType(typeItem['base'])
baseType = createDataType(typeItem['base'])
dtRet = special_dtype(vlen=np.dtype(baseType))
elif typeClass == 'H5T_OPAQUE':
if dims:
Expand Down Expand Up @@ -842,9 +846,8 @@ def createBaseDataType(typeItem):
else:
# not a boolean enum, use h5py special dtype
dtRet = special_dtype(enum=(dt, mapping))

else:
raise TypeError("Invalid type class")
raise TypeError(f"Invalid base type class: {typeClass}")

return dtRet

Expand Down
157 changes: 157 additions & 0 deletions test/hl/test_datatype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
##############################################################################
# Copyright by The HDF Group. #
# All rights reserved. #
# #
# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and #
# Utilities. The full HDF5 REST Server copyright notice, including #
# terms governing use, modification, and redistribution, is contained in #
# the file COPYING, which can be found at the root of the source code #
# distribution tree. If you do not have access to this file, you may #
# request a copy from [email protected]. #
##############################################################################

import numpy as np
import math
import logging
import config

if config.get("use_h5py"):
import h5py
else:
import h5pyd as h5py

from common import ut, TestCase


class TestScalarCompound(TestCase):

def setUp(self):
filename = self.getFileName("scalar_compound_dset")
print("filename:", filename)
self.f = h5py.File(filename, "w")
self.data = np.array((42.5, -118, "Hello"), dtype=[('a', 'f'), ('b', 'i'), ('c', '|S10')])
self.dset = self.f.create_dataset('x', data=self.data)

def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 0)

def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, tuple())

def test_size(self):
""" Verify size """
self.assertEqual(self.dset.size, 1)

def test_ellipsis(self):
""" Ellipsis -> scalar ndarray """
out = self.dset[...]
# assertArrayEqual doesn't work with compounds; do manually
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, self.data.shape)
self.assertEqual(out.dtype, self.data.dtype)

def test_tuple(self):
""" () -> np.void instance """
out = self.dset[()]
self.assertIsInstance(out, np.void)
self.assertEqual(out.dtype, self.data.dtype)

def test_slice(self):
""" slice -> ValueError """
with self.assertRaises(ValueError):
self.dset[0:4]

def test_index(self):
""" index -> ValueError """
with self.assertRaises(ValueError):
self.dset[0]

def test_rt(self):
""" Compound types are read back in correct order (h5py issue 236)"""

dt = np.dtype([('weight', np.float64),
('cputime', np.float64),
('walltime', np.float64),
('parents_offset', np.uint32),
('n_parents', np.uint32),
('status', np.uint8),
('endpoint_type', np.uint8),])

testdata = np.ndarray((16,), dtype=dt)
for key in dt.fields:
testdata[key] = np.random.random((16,)) * 100

self.f['test'] = testdata
outdata = self.f['test'][...]
self.assertTrue(np.all(outdata == testdata))
self.assertEqual(outdata.dtype, testdata.dtype)

def test_assign(self):
dt = np.dtype([('weight', (np.float64)),
('endpoint_type', np.uint8),])

testdata = np.ndarray((16,), dtype=dt)
for key in dt.fields:
testdata[key] = np.random.random(size=testdata[key].shape) * 100

ds = self.f.create_dataset('test', (16,), dtype=dt)
for key in dt.fields:
ds[key] = testdata[key]

outdata = self.f['test'][...]

self.assertTrue(np.all(outdata == testdata))
self.assertEqual(outdata.dtype, testdata.dtype)

def test_read(self):
dt = np.dtype([('weight', (np.float64)),
('endpoint_type', np.uint8),])

testdata = np.ndarray((16,), dtype=dt)
for key in dt.fields:
testdata[key] = np.random.random(size=testdata[key].shape) * 100

ds = self.f.create_dataset('test', (16,), dtype=dt)

# Write to all fields
ds[...] = testdata

for key in dt.fields:
outdata = self.f['test'][key]
np.testing.assert_array_equal(outdata, testdata[key])
self.assertEqual(outdata.dtype, testdata[key].dtype)

"""
TBD
def test_nested_compound_vlen(self):
dt_inner = np.dtype([('a', h5py.vlen_dtype(np.int32)),
('b', h5py.vlen_dtype(np.int32))])
dt = np.dtype([('f1', h5py.vlen_dtype(dt_inner)),
('f2', np.int64)])
inner1 = (np.array(range(1, 3), dtype=np.int32),
np.array(range(6, 9), dtype=np.int32))
inner2 = (np.array(range(10, 14), dtype=np.int32),
np.array(range(16, 21), dtype=np.int32))
data = np.array([(np.array([inner1, inner2], dtype=dt_inner), 2),
(np.array([inner1], dtype=dt_inner), 3)],
dtype=dt)
self.f["ds"] = data
out = self.f["ds"]
# Specifying check_alignment=False because vlen fields have 8 bytes of padding
# because the vlen datatype in hdf5 occupies 16 bytes
self.assertArrayEqual(out, data, check_alignment=False)
"""


if __name__ == '__main__':
loglevel = logging.ERROR
logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel)
ut.main()
3 changes: 3 additions & 0 deletions testall.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import sys


hl_tests = ('test_attribute',
'test_committedtype',
'test_complex_numbers',
Expand All @@ -26,6 +27,7 @@
'test_dataset_pointselect',
'test_dataset_scalar',
'test_dataset_setitem',
'test_datatype',
'test_dimscale',
'test_file',
'test_group',
Expand All @@ -34,6 +36,7 @@
'test_vlentype',
'test_folder')


app_tests = ('test_hsinfo', 'test_tall_inspect', 'test_diamond_inspect',
'test_shuffle_inspect')

Expand Down

0 comments on commit b61fa0e

Please sign in to comment.