From 2d6e7f898ae8c9ff0b2c78bb4f5aaf9a8caa305c Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 23 May 2023 12:42:22 +0200
Subject: [PATCH] Various documentation fixes (#2)

* Various docs fixes
* Fix import issue
* Add pylegendtestdata to docs requirements (for notebooks)
* Install pandoc on GitHub action runners
* Remove problematic "language" setting in Sphinx conf
* Update docstrings

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/main.yml                    |   5 +-
 .gitignore                                    |   1 +
 .pre-commit-config.yaml                       |  12 +-
 README.md                                     |   1 +
 docs/source/conf.py                           |   7 +-
 docs/source/index.rst                         |  41 ++-
 docs/source/notebooks/LH5Files.ipynb          | 328 ++++++++++++++++++
 docs/source/tutorials.rst                     |  11 +
 setup.cfg                                     |   2 +-
 src/lgdo/__init__.py                          |  28 +-
 src/lgdo/compression/__init__.py              |   4 +-
 src/lgdo/compression/radware.py               |   3 +-
 src/lgdo/compression/varlen.py                |   1 -
 src/lgdo/lgdo_utils.py                        |   2 +-
 src/lgdo/lh5_store.py                         |  16 +-
 src/lgdo/types/__init__.py                    |  40 +--
 src/lgdo/types/encoded.py                     |   6 +-
 src/lgdo/types/fixedsizearray.py              |   2 +-
 src/lgdo/types/scalar.py                      |   4 +-
 src/lgdo/types/struct.py                      |   2 +-
 src/lgdo/types/table.py                       |   5 +-
 src/lgdo/types/vectorofvectors.py             |   2 +-
 src/lgdo/types/waveform_table.py              |   2 +-
 tests/compression/test_radware_sigcompress.py |   6 +-
 tests/conftest.py                             |   3 -
 tests/types/test_array.py                     |   4 +-
 tests/types/test_vectorofvectors.py           |   9 +-
 27 files changed, 441 insertions(+), 106 deletions(-)
 create mode 100644 docs/source/notebooks/LH5Files.ipynb
 create mode 100644 docs/source/tutorials.rst

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 832a1c84..66d8ff1d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -39,7 +39,6 @@ jobs:
 
   test-coverage:
     name: Calculate and upload test coverage
-    needs: build-and-test
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -59,7 +58,6 @@ jobs:
 
   build-docs:
     name: Build documentation
-    needs: build-and-test
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -70,8 +68,9 @@ jobs:
           python-version: '3.10'
       - name: Setup build environment
         run: |
+          sudo apt-get install -y pandoc
           python -m pip install --upgrade pip wheel setuptools
-          python -m pip install --upgrade .[docs]
+          python -m pip install --upgrade .[all]
       - name: Build docs for current ref
         run: |
           cd docs
diff --git a/.gitignore b/.gitignore
index a6891538..10009fad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
+*.lh5
 
 # C extensions
 *.so
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b5ba1f5d..b3987f70 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,12 +37,12 @@ repos:
   rev: "v3.3.1"
   hooks:
   - id: pyupgrade
-    args: ["--py36-plus"]
+    args: ["--py38-plus"]
 
 - repo: https://github.com/psf/black
   rev: "23.3.0"
   hooks:
-  - id: black
+  - id: black-jupyter
 
 - repo: https://github.com/pre-commit/mirrors-mypy
   rev: "v1.1.1"
@@ -62,18 +62,18 @@ repos:
   hooks:
   - id: flake8
     additional_dependencies: [
-      flake8-bugbear,
+      flake8-bugbear>=23.1.17,
       flake8-print,
-      flake8-docstrings,
+      # flake8-docstrings,
       pep8-naming
     ]
-    args: ["--docstring-convention", "numpy"]  # or google, change me
+    # args: ["--docstring-convention", "numpy"]  # or google, change me
 
 - repo: https://github.com/kynan/nbstripout
   rev: "0.6.1"
   hooks:
     - id: nbstripout
-      args: ["--strip-empty-cells",
+      args: ["--drop-empty-cells",
              "--extra-keys", "metadata.kernelspec metadata.language_info"]
 
 - repo: https://github.com/mgedmin/check-manifest
diff --git a/README.md b/README.md
index bfc42ce5..c0f6dc3b 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # legend-pydataobj
 
+[![PyPI](https://img.shields.io/pypi/v/legend-pydataobj?logo=pypi)](https://pypi.org/project/legend-pydataobj/)
 ![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/legend-exp/legend-pydataobj?logo=git)
 [![GitHub Workflow Status](https://img.shields.io/github/checks-status/legend-exp/legend-pydataobj/main?label=main%20branch&logo=github)](https://github.com/legend-exp/legend-pydataobj/actions)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1b2ecdbf..f96d8528 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -8,7 +8,7 @@
 sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix())
 sys.path.append(Path("extensions").resolve().as_posix())
 
-project = "lgdo"
+project = "legend-pydataobj"
 copyright = "2023, the LEGEND Collaboration"
 version = get_distribution("legend-pydataobj").version
 
@@ -30,7 +30,6 @@
     ".md": "markdown",
 }
 master_doc = "index"
-language = "python"
 
 # Furo theme
 html_theme = "furo"
@@ -71,8 +70,8 @@
 ----
 
 This page has been automatically generated by nbsphinx_ and can be run as a
-Jupyter_ notebook available in the `pygama repository
-<https://github.com/legend-exp/pygama/tree/main/docs/source/notebooks>`_.
+Jupyter_ notebook available in the `legend-pydataobj repository
+<https://github.com/legend-exp/legend-pydataobj/tree/main/docs/source/notebooks>`_.
 
 .. _nbsphinx: https://nbsphinx.readthedocs.io/
 .. _Jupyter: https://jupyter.org/
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9c8dd74d..3073bf24 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,10 +1,43 @@
-Welcome to lgdo's documentation!
-==========================================
+LEGEND Data Objects
+===================
 
-Table of Contents
------------------
+|legend-pydataobj| is a Python implementation of the `LEGEND Data Format Specification <1_>`_.
+
+Getting started
+---------------
+
+|legend-pydataobj| is published on the `Python Package Index <2_>`_. Install on
+local systems with `pip <3_>`_:
+
+.. tab:: Stable release
+
+    .. code-block:: console
+
+        $ pip install legend-pydataobj
+
+.. tab:: Unstable (``main`` branch)
+
+    .. code-block:: console
+
+        $ pip install legend-pydataobj@git+https://github.com/legend-exp/legend-pydataobj@main
+
+.. tab:: Linux Containers
+
+    Get a LEGEND container with |legend-pydataobj| pre-installed on `Docker hub
+    <https://hub.docker.com/r/legendexp/legend-software>`_ or follow
+    instructions on the `LEGEND wiki
+    <https://legend-exp.atlassian.net/l/cp/nF1ww5KH>`_.
+
+Next steps
+----------
 
 .. toctree::
    :maxdepth: 1
 
+   tutorials
    Package API reference <api/modules>
+
+.. _1: https://legend-exp.github.io/legend-data-format-specs
+.. _2: https://pypi.org/project/legend-pydataobj
+.. _3: https://pip.pypa.io/en/stable/getting-started
+.. |legend-pydataobj| replace:: *legend-pydataobj*
diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb
new file mode 100644
index 00000000..8563f4bd
--- /dev/null
+++ b/docs/source/notebooks/LH5Files.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "28a2ded5",
+   "metadata": {},
+   "source": [
+    "# Handling LH5 data\n",
+    "\n",
+    "LEGEND stores its data in [HDF5](https://www.hdfgroup.org/solutions/hdf5) format, a high-performance data format becoming popular in experimental physics. LEGEND Data Objects (LGDO) are represented as HDF5 objects according to a custom specification, documented [here](https://legend-exp.github.io/legend-data-format-specs/dev/hdf5)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "701caca4",
+   "metadata": {},
+   "source": [
+    "## Reading data from disk\n",
+    "\n",
+    "Let's start by downloading a small test LH5 file with the [pylegendtestdata](https://pypi.org/project/pylegendtestdata/) package (it takes a while depending on your internet connection):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce298181",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from legendtestdata import LegendTestData\n",
+    "\n",
+    "ldata = LegendTestData()\n",
+    "lh5_file = ldata.get_path(\"lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c136b537",
+   "metadata": {},
+   "source": [
+    "We can use `lgdo.lh5_store.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.ls) to inspect the file contents:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb629856",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lgdo import ls\n",
+    "\n",
+    "ls(lh5_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "890676b6",
+   "metadata": {},
+   "source": [
+    "This particular file contains an HDF5 group (they behave like directories). The second argument of `ls()` can be used to inspect a group (without the trailing `/`, only the group name is returned, if existing):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22218548",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls(lh5_file, \"geds/\")  # returns ['geds/raw'], which is a group again\n",
+    "ls(lh5_file, \"geds/raw/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4fd29157",
+   "metadata": {},
+   "source": [
+    "<div class=\"alert alert-info\">\n",
+    "\n",
+    "**Note:** Alternatively to `ls()`, `show()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.show) prints a nice representation of the LH5 file contents (with LGDO types) on screen:\n",
+    "\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a382fcbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lgdo import show\n",
+    "\n",
+    "show(lh5_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ef09f43c",
+   "metadata": {},
+   "source": [
+    "The group contains several LGDOs. Let's read them in memory. We start by initializing an `LH5Store` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store) object:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f4e2525",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lgdo import LH5Store\n",
+    "\n",
+    "store = LH5Store()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60ce5b61",
+   "metadata": {},
+   "source": [
+    "`read_object()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.read_object) reads an LGDO from disk and returns the object in memory together with the number of rows (as a tuple), if an object has such a property. Let's try to read `geds/raw`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a8b9ca4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store.read_object(\"geds/raw\", lh5_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9efdb924",
+   "metadata": {},
+   "source": [
+    "As shown by the type signature, it is interpreted as a `Table` with 100 rows. Its contents (or \"columns\") can be therefore viewed as LGDO objects of the same length. For example `timestamp`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da22855d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file)\n",
+    "obj"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f2c8ff34",
+   "metadata": {},
+   "source": [
+    "is an LGDO `Array` with 100 elements.\n",
+    "\n",
+    "`read_object()` also allows to perform more advanced data reading. For example, let's read only rows from 15 to 25:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce246705",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
+    "print(obj)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "195ebefd",
+   "metadata": {},
+   "source": [
+    "Or, let's read only columns `timestamp` and `energy` from the `geds/raw` table and rows `[1, 3, 7, 9, 10, 15]`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eaa06ebe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj, n_rows = store.read_object(\n",
+    "    \"geds/raw\", lh5_file, field_mask=(\"timestamp\", \"energy\"), idx=[1, 3, 7, 9, 10, 15]\n",
+    ")\n",
+    "print(obj)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3f52d77",
+   "metadata": {},
+   "source": [
+    "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f24d4e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lgdo import LH5Iterator\n",
+    "\n",
+    "for lh5_obj, entry, n_rows in LH5Iterator(lh5_file, \"geds/raw/energy\", buffer_len=20):\n",
+    "    print(f\"entry {entry}, energy = {lh5_obj} ({n_rows} rows)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ab3794c",
+   "metadata": {},
+   "source": [
+    "## Writing data to disk\n",
+    "\n",
+    "Let's start by creating some LGDOs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aaf2d352",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lgdo import Array, Scalar, WaveformTable\n",
+    "import numpy as np\n",
+    "\n",
+    "rng = np.random.default_rng(12345)\n",
+    "\n",
+    "scalar = Scalar(\"made with legend-pydataobj!\")\n",
+    "array = Array(rng.random(size=10))\n",
+    "wf_table = WaveformTable(values=rng.integers(low=1000, high=5000, size=(10, 1000)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cbfd91c2",
+   "metadata": {},
+   "source": [
+    "The `write_object()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write_object) method of `LH5Store` makes it possible to write LGDO objects on disk. Let's start by writing `scalar` with name `message` in a file named `my_data.lh5` in the current directory:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5ad1d10e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store = LH5Store()\n",
+    "\n",
+    "store.write_object(\n",
+    "    scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "430c2c7e",
+   "metadata": {},
+   "source": [
+    "Let's now inspect the file contents:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "777d7daf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lgdo import show\n",
+    "\n",
+    "show(\"my_objects.lh5\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "227bea86",
+   "metadata": {},
+   "source": [
+    "The string object has been written at the root of the file `/`. Let's now write also `array` and `wf_table`, this time in a HDF5 group called `closet`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19c7703c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "store.write_object(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
+    "store.write_object(\n",
+    "    wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\"\n",
+    ")\n",
+    "show(\"my_objects.lh5\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e266c2ac",
+   "metadata": {},
+   "source": [
+    "Everything looks right!\n",
+    "\n",
+    "<div class=\"alert alert-info\">\n",
+    "\n",
+    "**Note:** `write_objects()` allows for more advanced usage, like writing only some rows of the input object or appending to existing array-like structures. Have a look at the [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write_object) for more information.\n",
+    "\n",
+    "</div>"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
new file mode 100644
index 00000000..88bc4990
--- /dev/null
+++ b/docs/source/tutorials.rst
@@ -0,0 +1,11 @@
+Tutorials (Jupyter notebooks)
+=============================
+
+Tutorials are currently available as `Jupyter notebooks on GitHub
+<https://github.com/legend-exp/legend-pydataobj/tree/main/tutorials>`_.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   notebooks/LH5Files
diff --git a/setup.cfg b/setup.cfg
index d77b11f3..50e9674b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,9 +33,9 @@ packages = find:
 install_requires =
     h5py>=3.2.0
     hdf5plugin
-    numpy>=1.21
     numba!=0.53.*,!=0.54.*
     numexpr
+    numpy>=1.21
     pandas>=1.4.4
     parse
     pint
diff --git a/src/lgdo/__init__.py b/src/lgdo/__init__.py
index 8cc57204..c2931b98 100644
--- a/src/lgdo/__init__.py
+++ b/src/lgdo/__init__.py
@@ -1,10 +1,10 @@
 """
-Pygama works with "LEGEND Data Objects" (LGDO) defined in the `LEGEND data
-format specification <https://github.com/legend-exp/legend-data-format-specs>`_.
-This subpackage serves as the Python implementation of that specification. The
-general strategy for the implementation is to dress standard Python and NumPy
-objects with an ``attr`` dictionary holding LGDO metadata, plus some convenience
-functions. The basic data object classes are:
+LEGEND Data Objects (LGDO) are defined in the `LEGEND data format specification
+<https://github.com/legend-exp/legend-data-format-specs>`_.  This package
+serves as the Python implementation of that specification. The general strategy
+for the implementation is to dress standard Python and NumPy objects with an
+``attr`` dictionary holding LGDO metadata, plus some convenience functions. The
+basic data object classes are:
 
 * :class:`.LGDO`: abstract base class for all LGDOs
 * :class:`.Scalar`: typed Python scalar. Access data via the :attr:`value`
@@ -38,12 +38,20 @@
 `h5py <https://www.h5py.org>`_.
 """
 
+from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
 from .types import (
-    Array, ArrayOfEqualSizedArrays, ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors,
-    FixedSizeArray, LGDO, Scalar, Struct, Table, VectorOfVectors, WaveformTable
+    LGDO,
+    Array,
+    ArrayOfEncodedEqualSizedArrays,
+    ArrayOfEqualSizedArrays,
+    FixedSizeArray,
+    Scalar,
+    Struct,
+    Table,
+    VectorOfEncodedVectors,
+    VectorOfVectors,
+    WaveformTable,
 )
-from .lgdo_utils import copy
-from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
 
 __all__ = [
     "Array",
diff --git a/src/lgdo/compression/__init__.py b/src/lgdo/compression/__init__.py
index 44216e5c..eeac018d 100644
--- a/src/lgdo/compression/__init__.py
+++ b/src/lgdo/compression/__init__.py
@@ -1,6 +1,6 @@
 r"""Data compression utilities.
 
-This subpackage collects all *pygama* custom data compression (encoding) and
+This subpackage collects all LEGEND custom data compression (encoding) and
 decompression (decoding) algorithms.
 
 Available lossless waveform compression algorithms:
@@ -16,7 +16,7 @@
 :func:`~.generic.encode` and :func:`~.generic.decode` provide a high-level
 interface for encoding/decoding :class:`~.lgdo.LGDO`\ s.
 
->>> from pygama.lgdo import WaveformTable, compression
+>>> from lgdo import WaveformTable, compression
 >>> wftbl = WaveformTable(...)
 >>> enc_wft = compression.encode(wftable, RadwareSigcompress(codec_shift=-23768)
 >>> compression.decode(enc_wft) # == wftbl
diff --git a/src/lgdo/compression/radware.py b/src/lgdo/compression/radware.py
index 96aa045c..1a49cd93 100644
--- a/src/lgdo/compression/radware.py
+++ b/src/lgdo/compression/radware.py
@@ -8,7 +8,6 @@
 from numpy.typing import NDArray
 
 from .. import types as lgdo
-
 from .base import WaveformCodec
 
 # fmt: off
@@ -23,7 +22,7 @@ class RadwareSigcompress(WaveformCodec):
 
     Examples
     --------
-    >>> from pygama.lgdo.compression import RadwareSigcompress
+    >>> from lgdo.compression import RadwareSigcompress
     >>> codec = RadwareSigcompress(codec_shift=-32768)
     """
 
diff --git a/src/lgdo/compression/varlen.py b/src/lgdo/compression/varlen.py
index 7ef2a078..f27ed8a2 100644
--- a/src/lgdo/compression/varlen.py
+++ b/src/lgdo/compression/varlen.py
@@ -11,7 +11,6 @@
 from numpy.typing import NDArray
 
 from .. import types as lgdo
-
 from .base import WaveformCodec
 
 log = logging.getLogger(__name__)
diff --git a/src/lgdo/lgdo_utils.py b/src/lgdo/lgdo_utils.py
index 02b1e878..b9112c8c 100644
--- a/src/lgdo/lgdo_utils.py
+++ b/src/lgdo/lgdo_utils.py
@@ -101,7 +101,7 @@ def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]
         the datatype name dims if not ``None``, a tuple of dimensions for the
         LGDO. Note this is not the same as the NumPy shape of the underlying
         data object. See the LGDO specification for more information. Also see
-        :class:`.ArrayOfEqualSizedArrays` and
+        :class:`~.types.ArrayOfEqualSizedArrays` and
         :meth:`.lh5_store.LH5Store.read_object` for example code elements for
         numeric objects, the element type for struct-like  objects, the list of
         fields in the struct.
diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5_store.py
index 38e20af0..5ceac736 100644
--- a/src/lgdo/lh5_store.py
+++ b/src/lgdo/lh5_store.py
@@ -20,20 +20,20 @@
 
 from . import compression as compress
 from .compression import WaveformCodec
+from .lgdo_utils import expand_path, parse_datatype
 from .types import (
     Array,
+    ArrayOfEncodedEqualSizedArrays,
     ArrayOfEqualSizedArrays,
-    ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors,
     FixedSizeArray,
     Scalar,
     Struct,
     Table,
+    VectorOfEncodedVectors,
     VectorOfVectors,
     WaveformTable,
 )
 
-from .lgdo_utils import expand_path, parse_datatype
-
 LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
 
 log = logging.getLogger(__name__)
@@ -48,11 +48,11 @@ class LH5Store:
 
     Examples
     --------
-    >>> from pygama.lgdo import LH5Store
+    >>> from lgdo import LH5Store
     >>> store = LH5Store()
     >>> obj, _ = store.read_object("/geds/waveform", "file.lh5")
     >>> type(obj)
-    pygama.lgdo.waveform_table.WaveformTable
+    lgdo.waveform_table.WaveformTable
     """
 
     def __init__(self, base_path: str = "", keep_open: bool = False) -> None:
@@ -219,7 +219,7 @@ def read_object(
             Start location in ``obj_buf`` for read. For concatenating data to
             array-like objects.
         decompress
-            Decompress data encoded with pygama's compression routines right
+            Decompress data encoded with LGDO's compression routines right
             after reading. The option has no effect on data encoded with HDF5
             built-in filters, which is always decompressed upstream by HDF5.
 
@@ -919,7 +919,7 @@ def write_object(
                     del group[key]
 
             for field in obj.keys():
-                # eventually compress waveform table values with pygama's
+                # eventually compress waveform table values with LGDO's
                 # custom codecs before writing
                 # if waveformtable.values.attrs["compression"] is a string,
                 # interpret it as an HDF5 built-in filter
@@ -1245,7 +1245,7 @@ def show(
 
     Examples
     --------
-    >>> from pygama.lgdo import show
+    >>> from lgdo import show
     >>> show("file.lh5", "/geds/raw")
     /geds/raw
     ├── channel · array<1>{real}
diff --git a/src/lgdo/types/__init__.py b/src/lgdo/types/__init__.py
index 58420883..57a18e0a 100644
--- a/src/lgdo/types/__init__.py
+++ b/src/lgdo/types/__init__.py
@@ -1,42 +1,4 @@
-"""
-Pygama works with "LEGEND Data Objects" (LGDO) defined in the `LEGEND data
-format specification <https://github.com/legend-exp/legend-data-format-specs>`_.
-This subpackage serves as the Python implementation of that specification. The
-general strategy for the implementation is to dress standard Python and NumPy
-objects with an ``attr`` dictionary holding LGDO metadata, plus some convenience
-functions. The basic data object classes are:
-
-* :class:`.LGDO`: abstract base class for all LGDOs
-* :class:`.Scalar`: typed Python scalar. Access data via the :attr:`value`
-  attribute
-* :class:`.Array`: basic :class:`numpy.ndarray`. Access data via the
-  :attr:`nda` attribute.
-* :class:`.FixedSizeArray`: basic :class:`numpy.ndarray`. Access data via the
-  :attr:`nda` attribute.
-* :class:`.ArrayOfEqualSizedArrays`: multi-dimensional :class:`numpy.ndarray`.
-  Access data via the :attr:`nda` attribute.
-* :class:`.VectorOfVectors`: a variable length array of variable length arrays.
-  Implemented as a pair of :class:`.Array`: :attr:`flattened_data` holding the
-  raw data, and :attr:`cumulative_length` whose ith element is the sum of the
-  lengths of the vectors with ``index <= i``
-* :class:`.VectorOfEncodedVectors`: an array of variable length *encoded*
-  arrays. Implemented as a :class:`.VectorOfVectors` :attr:`encoded_data`
-  holding the encoded vectors and an :class:`.Array` :attr:`decoded_size`
-  specifying the size of each decoded vector. Mainly used to represent a list
-  of compressed waveforms.
-* :class:`.ArrayOfEncodedEqualSizedArrays`: an array of equal sized encoded
-  arrays. Similar to :class:`.VectorOfEncodedVectors` except for
-  :attr:`decoded_size`, which is now a scalar.
-* :class:`.Struct`: a dictionary containing LGDO objects. Derives from
-  :class:`dict`
-* :class:`.Table`: a :class:`.Struct` whose elements ("columns") are all array
-  types with the same length (number of rows)
-
-Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO
-is done via the class :class:`.lh5_store.LH5Store`. LH5 files can also be
-browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
-`h5py <https://www.h5py.org>`_.
-"""
+"""LEGEND Data Objects (LGDO) types."""
 
 from .array import Array
 from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py
index f99d93d5..68886273 100644
--- a/src/lgdo/types/encoded.py
+++ b/src/lgdo/types/encoded.py
@@ -6,9 +6,9 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from .. import lgdo_utils as utils
 from .array import Array
 from .lgdo import LGDO
-from ..lgdo_utils import get_element_type
 from .scalar import Scalar
 from .vectorofvectors import VectorOfVectors
 
@@ -71,7 +71,7 @@ def datatype_name(self) -> str:
         return "array"
 
     def form_datatype(self) -> str:
-        et = get_element_type(self.encoded_data)
+        et = utils.get_element_type(self.encoded_data)
         return "array<1>{encoded_array<1>{" + et + "}}"
 
     def __len__(self) -> int:
@@ -276,7 +276,7 @@ def datatype_name(self) -> str:
         return "array"
 
     def form_datatype(self) -> str:
-        et = get_element_type(self.encoded_data)
+        et = utils.get_element_type(self.encoded_data)
         return "array_of_encoded_equalsized_arrays<1,1>{" + et + "}"
 
     def __len__(self) -> int:
diff --git a/src/lgdo/types/fixedsizearray.py b/src/lgdo/types/fixedsizearray.py
index c8d70185..89eae3e5 100644
--- a/src/lgdo/types/fixedsizearray.py
+++ b/src/lgdo/types/fixedsizearray.py
@@ -19,7 +19,7 @@ class FixedSizeArray(Array):
     application to application.  This data type is used for optimized memory
     handling on some platforms. We are not that sophisticated so we are just
     storing this identification for LGDO validity, i.e. for now this class is
-    just an alias for :class:`~.Array`, but keeps track of the datatype name.
+    just an alias for :class:`.Array`, but keeps track of the datatype name.
     """
 
     def __init__(
diff --git a/src/lgdo/types/scalar.py b/src/lgdo/types/scalar.py
index e1ee5f88..86630899 100644
--- a/src/lgdo/types/scalar.py
+++ b/src/lgdo/types/scalar.py
@@ -7,8 +7,8 @@
 
 import numpy as np
 
+from .. import lgdo_utils as utils
 from .lgdo import LGDO
-from ..lgdo_utils import get_element_type
 
 log = logging.getLogger(__name__)
 
@@ -37,7 +37,7 @@ def datatype_name(self) -> str:
         if hasattr(self.value, "datatype_name"):
             return self.value.datatype_name
         else:
-            return get_element_type(self.value)
+            return utils.get_element_type(self.value)
 
     def form_datatype(self) -> str:
         return self.datatype_name()
diff --git a/src/lgdo/types/struct.py b/src/lgdo/types/struct.py
index 7fa2373f..c3f32711 100644
--- a/src/lgdo/types/struct.py
+++ b/src/lgdo/types/struct.py
@@ -62,7 +62,7 @@ def remove_field(self, name: str | int, delete: bool = False) -> None:
         Parameters
         ----------
         name
-            name of the field to be removed
+            name of the field to be removed.
         delete
             if ``True``, delete the field object by calling :any:`del`.
         """
diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py
index a5985dff..c321bfcc 100644
--- a/src/lgdo/types/table.py
+++ b/src/lgdo/types/table.py
@@ -48,7 +48,7 @@ def __init__(
         Parameters
         ----------
         size
-            sets the number of rows in the table. :class:`~.Array`\ s in
+            sets the number of rows in the table. :class:`.Array`\ s in
             `col_dict will be resized to match size if both are not ``None``.
             If `size` is left as ``None``, the number of table rows is
             determined from the length of the first array in `col_dict`. If
@@ -263,7 +263,8 @@ def eval(self, expr_config: dict) -> Table:
             - ``expression`` is an expression string supported by
               :meth:`numexpr.evaluate` (see also `here
               <https://numexpr.readthedocs.io/projects/NumExpr3/en/latest/index.html>`_
-              for documentation). Note: because of internal limitations, reduction operations must appear the last in the stack.
+              for documentation). Note: because of internal limitations,
+              reduction operations must appear the last in the stack.
             - ``parameters`` is a dictionary of function parameters. Passed to
               :meth:`numexpr.evaluate`` as `local_dict` argument.
 
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index 99269fad..7d227a52 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -13,8 +13,8 @@
 import numpy as np
 from numpy.typing import DTypeLike, NDArray
 
-from . import arrayofequalsizedarrays as aoesa
 from .. import lgdo_utils as utils
+from . import arrayofequalsizedarrays as aoesa
 from .array import Array
 from .lgdo import LGDO
 
diff --git a/src/lgdo/types/waveform_table.py b/src/lgdo/types/waveform_table.py
index 6cf948e1..f444c727 100644
--- a/src/lgdo/types/waveform_table.py
+++ b/src/lgdo/types/waveform_table.py
@@ -22,7 +22,7 @@
 class WaveformTable(Table):
     r"""An LGDO for storing blocks of (1D) time-series data.
 
-    A :class:`WaveformTable` is an LGDO :class:`~.lgdo.table.Table` with the 3
+    A :class:`WaveformTable` is an LGDO :class:`.Table` with the 3
     columns ``t0``, ``dt``, and ``values``:
 
     * ``t0[i]`` is a time offset (relative to a user-defined global reference)
diff --git a/tests/compression/test_radware_sigcompress.py b/tests/compression/test_radware_sigcompress.py
index 45874281..09a01cb9 100644
--- a/tests/compression/test_radware_sigcompress.py
+++ b/tests/compression/test_radware_sigcompress.py
@@ -2,11 +2,7 @@
 
 import numpy as np
 
-from lgdo import (
-    ArrayOfEncodedEqualSizedArrays,
-    ArrayOfEqualSizedArrays,
-    LH5Store,
-)
+from lgdo import ArrayOfEncodedEqualSizedArrays, ArrayOfEqualSizedArrays, LH5Store
 from lgdo.compression.radware import (
     _get_hton_u16,
     _radware_sigcompress_decode,
diff --git a/tests/conftest.py b/tests/conftest.py
index 45c64a32..cb3fec4e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,7 +1,4 @@
-import copy
-import inspect
 import os
-import re
 import shutil
 import uuid
 from getpass import getuser
diff --git a/tests/types/test_array.py b/tests/types/test_array.py
index 2fd6690b..0932c99b 100644
--- a/tests/types/test_array.py
+++ b/tests/types/test_array.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-import lgdo
+import lgdo.lgdo_utils as utils
 from lgdo import Array
 
 
@@ -29,7 +29,7 @@ def test_resize():
 
 def test_copy():
     a1 = Array(np.array([1, 2, 3, 4]))
-    a2 = lgdo.copy(a1)
+    a2 = utils.copy(a1)
     assert a1 == a2
 
 
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index 76c1802c..4126d119 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -2,6 +2,7 @@
 import pytest
 
 import lgdo
+import lgdo.lgdo_utils as utils
 from lgdo import VectorOfVectors
 from lgdo.types import vectorofvectors as vov
 
@@ -180,7 +181,7 @@ def test_insert(lgdo_vov):
 
 
 def test_replace(lgdo_vov):
-    v = lgdo.copy(lgdo_vov)
+    v = utils.copy(lgdo_vov)
     v.replace(1, np.zeros(3))
     assert v == VectorOfVectors(
         [
@@ -192,7 +193,7 @@ def test_replace(lgdo_vov):
         ]
     )
 
-    v = lgdo.copy(lgdo_vov)
+    v = utils.copy(lgdo_vov)
     v.replace(1, np.zeros(2))
     assert v == VectorOfVectors(
         [
@@ -204,7 +205,7 @@ def test_replace(lgdo_vov):
         ]
     )
 
-    v = lgdo.copy(lgdo_vov)
+    v = utils.copy(lgdo_vov)
     v.replace(1, np.zeros(4))
     assert v == VectorOfVectors(
         [
@@ -268,4 +269,4 @@ def test_build_cl_and_explodes():
 
 
 def test_copy(lgdo_vov):
-    assert lgdo_vov == lgdo.copy(lgdo_vov)
+    assert lgdo_vov == utils.copy(lgdo_vov)