added example using field selection

HDFGroup · Oct 30, 2024 · 822971e · 822971e
1 parent c30fb5c
commit 822971e
Showing 1 changed file with 212 additions and 0 deletions.
diff --git a/examples/notebooks/compound_type_example.ipynb b/examples/notebooks/compound_type_example.ipynb
@@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "import numpy as np\n",
+    "USE_H5PY=False\n",
+    "if USE_H5PY:\n",
+    "    import h5py\n",
+    "    filepath = \"./compound.h5\"\n",
+    "else:\n",
+    "    import h5pyd as h5py\n",
+    "    filepath = \"/home/test_user1/test/compound.h5\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a new domain/file\n",
+    "f = h5py.File(filepath, \"w\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a numpy dtype with 260 Fields: \n",
+    "# A0, A1, A2, ..., Z7, Z8, Z9\n",
+    "fields = []\n",
+    "for i in range(26):\n",
+    "    ch1 = chr(ord('A') + i)\n",
+    "    for j in range(10):\n",
+    "        ch2 = chr(ord('0') + j)\n",
+    "        fields.append((ch1+ch2, \"S6\"))\n",
+    "dt = np.dtype(fields)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<HDF5 dataset \"dset\": shape (10000,), type \"|V1560\">"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# create a dataset using the dtype\n",
+    "NUM_ROWS = 10000\n",
+    "dset = f.create_dataset(\"dset\", (NUM_ROWS,), dtype=dt)\n",
+    "dset\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write some values into the dataset\n",
+    "arr = np.zeros((NUM_ROWS,), dtype=dt)\n",
+    "for i in range(NUM_ROWS):\n",
+    "    row = arr[i]\n",
+    "    for name in dt.names:\n",
+    "        row[name] = f\"{i:03d}_{name}\".encode()\n",
+    "dset[:] = arr[:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['C4', 'P6', 'V0', 'S8', 'P4', 'B5', 'L1', 'E7']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# get a random set of field names.\n",
+    "# k controls the max number of names returned\n",
+    "names = random.choices(dt.names, k=10)\n",
+    "names = list(set(names))\n",
+    "names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 27.1 ms, sys: 16.1 ms, total: 43.2 ms\n",
+      "Wall time: 93.8 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([(b'000_C4', b'000_P6', b'000_V0', b'000_S8', b'000_P4', b'000_B5', b'000_L1', b'000_E7'),\n",
+       "       (b'001_C4', b'001_P6', b'001_V0', b'001_S8', b'001_P4', b'001_B5', b'001_L1', b'001_E7'),\n",
+       "       (b'002_C4', b'002_P6', b'002_V0', b'002_S8', b'002_P4', b'002_B5', b'002_L1', b'002_E7'),\n",
+       "       ...,\n",
+       "       (b'9997_C', b'9997_P', b'9997_V', b'9997_S', b'9997_P', b'9997_B', b'9997_L', b'9997_E'),\n",
+       "       (b'9998_C', b'9998_P', b'9998_V', b'9998_S', b'9998_P', b'9998_B', b'9998_L', b'9998_E'),\n",
+       "       (b'9999_C', b'9999_P', b'9999_V', b'9999_S', b'9999_P', b'9999_B', b'9999_L', b'9999_E')],\n",
+       "      dtype={'names': ['C4', 'P6', 'V0', 'S8', 'P4', 'B5', 'L1', 'E7'], 'formats': ['S6', 'S6', 'S6', 'S6', 'S6', 'S6', 'S6', 'S6'], 'offsets': [144, 936, 1260, 1128, 924, 90, 666, 282], 'itemsize': 1560})"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get the dataset values and then return the field selection\n",
+    "%time dset[:][names]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 3.92 ms, sys: 0 ns, total: 3.92 ms\n",
+      "Wall time: 20.7 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([(b'000_C4', b'000_P6', b'000_V0', b'000_S8', b'000_P4', b'000_B5', b'000_L1', b'000_E7'),\n",
+       "       (b'001_C4', b'001_P6', b'001_V0', b'001_S8', b'001_P4', b'001_B5', b'001_L1', b'001_E7'),\n",
+       "       (b'002_C4', b'002_P6', b'002_V0', b'002_S8', b'002_P4', b'002_B5', b'002_L1', b'002_E7'),\n",
+       "       ...,\n",
+       "       (b'9997_C', b'9997_P', b'9997_V', b'9997_S', b'9997_P', b'9997_B', b'9997_L', b'9997_E'),\n",
+       "       (b'9998_C', b'9998_P', b'9998_V', b'9998_S', b'9998_P', b'9998_B', b'9998_L', b'9998_E'),\n",
+       "       (b'9999_C', b'9999_P', b'9999_V', b'9999_S', b'9999_P', b'9999_B', b'9999_L', b'9999_E')],\n",
+       "      dtype=[('C4', 'S6'), ('P6', 'S6'), ('V0', 'S6'), ('S8', 'S6'), ('P4', 'S6'), ('B5', 'S6'), ('L1', 'S6'), ('E7', 'S6')])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Have HSDS (or HDF5 lib) return just the values for the given set of field names\n",
+    "# Will return same values as above cell, but should be faster as less data needs \n",
+    "# to be transferred \n",
+    "%time dset.fields(names)[:]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hs",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}