Skip to content

Commit

Permalink
added example using field selection
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey committed Oct 30, 2024
1 parent c30fb5c commit 822971e
Showing 1 changed file with 212 additions and 0 deletions.
212 changes: 212 additions & 0 deletions examples/notebooks/compound_type_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import numpy as np\n",
"USE_H5PY=False\n",
"if USE_H5PY:\n",
" import h5py\n",
" filepath = \"./compound.h5\"\n",
"else:\n",
" import h5pyd as h5py\n",
" filepath = \"/home/test_user1/test/compound.h5\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# create a new domain/file\n",
"f = h5py.File(filepath, \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# create a numpy dtype with 260 Fields: \n",
"# A0, A1, A2, ..., Z7, Z8, Z9\n",
"fields = []\n",
"for i in range(26):\n",
" ch1 = chr(ord('A') + i)\n",
" for j in range(10):\n",
" ch2 = chr(ord('0') + j)\n",
" fields.append((ch1+ch2, \"S6\"))\n",
"dt = np.dtype(fields)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<HDF5 dataset \"dset\": shape (10000,), type \"|V1560\">"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create a dataset using the dtype\n",
"NUM_ROWS = 10000\n",
"dset = f.create_dataset(\"dset\", (NUM_ROWS,), dtype=dt)\n",
"dset\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# write some values into the dataset\n",
"arr = np.zeros((NUM_ROWS,), dtype=dt)\n",
"for i in range(NUM_ROWS):\n",
" row = arr[i]\n",
" for name in dt.names:\n",
" row[name] = f\"{i:03d}_{name}\".encode()\n",
"dset[:] = arr[:]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['C4', 'P6', 'V0', 'S8', 'P4', 'B5', 'L1', 'E7']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get a random set of field names.\n",
"# k controls the max number of names returned\n",
"names = random.choices(dt.names, k=10)\n",
"names = list(set(names))\n",
"names"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 27.1 ms, sys: 16.1 ms, total: 43.2 ms\n",
"Wall time: 93.8 ms\n"
]
},
{
"data": {
"text/plain": [
"array([(b'000_C4', b'000_P6', b'000_V0', b'000_S8', b'000_P4', b'000_B5', b'000_L1', b'000_E7'),\n",
" (b'001_C4', b'001_P6', b'001_V0', b'001_S8', b'001_P4', b'001_B5', b'001_L1', b'001_E7'),\n",
" (b'002_C4', b'002_P6', b'002_V0', b'002_S8', b'002_P4', b'002_B5', b'002_L1', b'002_E7'),\n",
" ...,\n",
" (b'9997_C', b'9997_P', b'9997_V', b'9997_S', b'9997_P', b'9997_B', b'9997_L', b'9997_E'),\n",
" (b'9998_C', b'9998_P', b'9998_V', b'9998_S', b'9998_P', b'9998_B', b'9998_L', b'9998_E'),\n",
" (b'9999_C', b'9999_P', b'9999_V', b'9999_S', b'9999_P', b'9999_B', b'9999_L', b'9999_E')],\n",
" dtype={'names': ['C4', 'P6', 'V0', 'S8', 'P4', 'B5', 'L1', 'E7'], 'formats': ['S6', 'S6', 'S6', 'S6', 'S6', 'S6', 'S6', 'S6'], 'offsets': [144, 936, 1260, 1128, 924, 90, 666, 282], 'itemsize': 1560})"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get the dataset values and then return the field selection\n",
"%time dset[:][names]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.92 ms, sys: 0 ns, total: 3.92 ms\n",
"Wall time: 20.7 ms\n"
]
},
{
"data": {
"text/plain": [
"array([(b'000_C4', b'000_P6', b'000_V0', b'000_S8', b'000_P4', b'000_B5', b'000_L1', b'000_E7'),\n",
" (b'001_C4', b'001_P6', b'001_V0', b'001_S8', b'001_P4', b'001_B5', b'001_L1', b'001_E7'),\n",
" (b'002_C4', b'002_P6', b'002_V0', b'002_S8', b'002_P4', b'002_B5', b'002_L1', b'002_E7'),\n",
" ...,\n",
" (b'9997_C', b'9997_P', b'9997_V', b'9997_S', b'9997_P', b'9997_B', b'9997_L', b'9997_E'),\n",
" (b'9998_C', b'9998_P', b'9998_V', b'9998_S', b'9998_P', b'9998_B', b'9998_L', b'9998_E'),\n",
" (b'9999_C', b'9999_P', b'9999_V', b'9999_S', b'9999_P', b'9999_B', b'9999_L', b'9999_E')],\n",
" dtype=[('C4', 'S6'), ('P6', 'S6'), ('V0', 'S6'), ('S8', 'S6'), ('P4', 'S6'), ('B5', 'S6'), ('L1', 'S6'), ('E7', 'S6')])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Have HSDS (or HDF5 lib) return just the values for the given set of field names\n",
"# Will return same values as above cell, but should be faster as less data needs \n",
"# to be transferred \n",
"%time dset.fields(names)[:]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hs",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 822971e

Please sign in to comment.