From 014ced4833a6e25ea1b880a058994c2726208d59 Mon Sep 17 00:00:00 2001
From: Will Barnes <will.t.barnes@gmail.com>
Date: Wed, 6 Nov 2024 17:29:30 -0500
Subject: [PATCH] move all file parsing to external functions, return tables
 instead of arrays

---
 pydrad/parse/parse.py             | 311 ++++++++++++++----------------
 pydrad/parse/tests/test_strand.py |  18 +-
 pydrad/parse/util.py              | 222 +++++++++++++++++++++
 3 files changed, 377 insertions(+), 174 deletions(-)
 create mode 100644 pydrad/parse/util.py

diff --git a/pydrad/parse/parse.py b/pydrad/parse/parse.py
index 98df8e2..09f8f97 100644
--- a/pydrad/parse/parse.py
+++ b/pydrad/parse/parse.py
@@ -1,20 +1,19 @@
 """
 Interface for easily parsing HYDRAD results
 """
-import glob
-import os
 import pathlib
 
+import astropy.constants as const
 import astropy.units as u
 import h5py
 import matplotlib.pyplot as plt
 import numpy as np
-import plasmapy.particles
-from pandas import read_csv
 from scipy.interpolate import splev, splrep
 
 from pydrad import log
 from pydrad.configure import Configure
+from pydrad.parse.util import (read_amr_file, read_hstate_file, read_ine_file,
+                               read_phy_file, read_trm_file)
 from pydrad.visualize import (animate_strand, plot_histogram, plot_profile,
                               plot_strand, plot_time_distance, plot_time_mesh)
 
@@ -172,10 +171,15 @@ def initial_conditions(self):
         `~pydrad.parse.Profile` for the solutions to the hydrostatic
         equations used as initial conditions.
         """
+        profile_kwargs = self._profile_kwargs.copy()
+        # NOTE: These files do not exist for the initial conditions
+        profile_kwargs['read_hstate'] = False
+        profile_kwargs['read_ine'] = False
+        profile_kwargs['read_trm'] = False
         return InitialProfile(self.hydrad_root,
                               0*u.s,
                               master_time=[0]*u.s,
-                              **self._profile_kwargs)
+                              **profile_kwargs)
 
     def peek(self, **kwargs):
         """
@@ -280,7 +284,7 @@ class Profile(object):
 
     Parameters
     ----------
-    hydrad_root : `str`
+    hydrad_root : path-like
         Path to HYDRAD directory
     time : `~astropy.units.Quantity`
         Timestep corresponding to the profile of interest
@@ -297,12 +301,13 @@ def __init__(self, hydrad_root, time: u.s, **kwargs):
             self._master_time = get_master_time(self.hydrad_root,
                                                 read_from_cfg=kwargs.get('read_from_cfg', False))
         # Read results files
-        self._read_phy()
         self._read_amr()
-        if kwargs.get('read_hstate', True):
-            self._read_hstate()
+        if kwargs.get('read_phy', True):
+            self._read_phy()
         if kwargs.get('read_ine', True):
             self._read_ine()
+        if kwargs.get('read_hstate', True):
+            self._read_hstate()
         if kwargs.get('read_trm', True):
             self._read_trm()
 
@@ -336,162 +341,58 @@ def __repr__(self):
 Filename: {self._phy_filename}
 Timestep #: {self._index}"""
 
-    def _read_phy(self):
+    def _read_amr(self):
         """
-        Parse the hydrodynamics results file
+        Parse the adaptive mesh grid file
         """
-        self._phy_data = np.loadtxt(self._phy_filename)
+        self._amr_data = read_amr_file(self._amr_filename)
 
-    def _read_amr(self):
+    def _read_phy(self):
         """
-        Parse the adaptive mesh grid file
+        Parse the hydrodynamics results file
         """
-        # TODO: Read other quantities from .amr file
-        with self._amr_filename.open() as f:
-            lines = f.readlines()
-            self._grid_centers = np.zeros((int(lines[3]),))
-            self._grid_widths = np.zeros((int(lines[3]),))
-            for i, l in enumerate(lines[4:]):
-                tmp = np.array(l.split(), dtype=float)
-                self._grid_centers[i] = tmp[0]
-                self._grid_widths[i] = tmp[1]
+        if not self._phy_filename.is_file():
+            log.warning(f'{self._phy_filename} not found. Skipping parsing of phy files. Set read_phy=False to suppress this warning.')
+            return
+        self._phy_data = read_phy_file(self._phy_filename)
+        # NOTE: only adding three columns in this manner as the remaining columns are dealt with explicitly
+        # below because of the overlap with the derived quantities from the .amr files.
+        for column in ['sound_speed', 'electron_heat_flux', 'ion_heat_flux']:
+            _add_property(column, '_phy_data')
 
     def _read_trm(self):
         """
         Parse the equation terms file
-
-        The files come in sets of 5 rows with variable number of columns:
-            -- Loop coordinate (1 column), and at each position:
-            -- Terms of mass equation (2 columns)
-            -- Terms of momentum equation (6 columns)
-            -- Terms of electron energy equation (11 columns)
-            -- Terms of hydrogen energy equation (11 columns)
-        """
-        units = {
-            'mass': 'g cm-3 s-1',
-            'momentum': 'dyne cm-3 s-1',
-            'electrons': 'erg cm-3 s-1',
-            'hydrogen': 'erg cm-3 s-1',
-        }
-        columns = {
-            'mass': [
-                'mass_drhobydt',
-                'mass_advection',
-            ],
-            'momentum': [
-                'momentum_drho_vbydt',
-                'momentum_advection',
-                'momentum_pressure_gradient',
-                'momentum_gravity',
-                'momentum_viscous_stress',
-                'momentum_numerical_viscosity',
-            ],
-            'electrons': [
-                'electron_dTEKEbydt',
-                'electron_enthalpy',
-                'electron_conduction',
-                'electron_gravity',
-                'electron_collisions',
-                'electron_heating',
-                'electron_radiative_loss',
-                'electron_electric_field',
-                'electron_viscous_stress',
-                'electron_numerical_viscosity',
-                'electron_ionization'
-            ],
-            'hydrogen': [
-                'hydrogen_dTEKEbydt',
-                'hydrogen_enthalpy',
-                'hydrogen_conduction',
-                'hydrogen_gravity',
-                'hydrogen_collisions',
-                'hydrogen_heating',
-                'hydrogen_radiative_loss',
-                'hydrogen_electric_field',
-                'hydrogen_viscous_stress',
-                'hydrogen_numerical_viscosity',
-                'hydrogen_ionization',
-            ]
-        }
-        offsets = {
-            'mass': 0,
-            'momentum': len(columns['mass']),
-            'electron': len(columns['mass'])+len(columns['momentum']),
-            'hydrogen': len(columns['mass'])+len(columns['momentum'])+len(columns['electron']),
-        }
-        # Read data from file into table per term category
-        terms = {}
-        for i,(k,v) in enumerate(columns.items()):
-            terms[k] = read_csv(self._trm_filename,
-                                sep='\t',
-                                header=None,
-                                names=v,
-                                skiprows=lambda x: x % 5 != (i+1))
-        # Read into single matrix
-        n_elements = len(terms['mass'])
-        self._trm_data = np.zeros([n_elements, offsets['hydrogen']+len(columns['hydrogen'])])
-        for i in range(n_elements):
-            for k,v in columns.items():
-                for j in range(len(v)):
-                    self._trm_data[i,j+offsets[k]] = terms[k][v[j]][i]
-        # Add individual property for each term
-        for k,v in columns.items():
-            for i in range(len(v)):
-                add_property(v[i], '_trm_data', i+offsets[k], units[k])
+        """
+        if not self._trm_filename.is_file():
+            log.warning(f'{self._trm_filename} not found. Skipping parsing of trm files. Set read_trm=False to suppress this warning.')
+            return
+        self._trm_data = read_trm_file(self._trm_filename)
+        for col in self._trm_data.colnames:
+            _add_property(col, '_trm_data')
 
     def _read_ine(self):
         """
         Parse non-equilibrium ionization population fraction files
         and set attributes for relevant quantities
         """
-        # TODO: clean this up somehow? I've purposefully included
-        # a lot of comments because the format of this file makes
-        # the parsing code quite opaque
-        try:
-            with self._ine_filename.open() as f:
-                lines = f.readlines()
-        except FileNotFoundError:
-            log.debug(f'{self._ine_filename} not found')
+        if not self._ine_filename.is_file():
+            log.warning(f'{self._ine_filename} not found. Skipping parsing of ine files. Set read_ine=False to suppress this warning.')
             return
-        # First parse all of the population fraction arrays
-        n_s = self.coordinate.shape[0]
-        # NOTE: Have to calculate the number of elements we have
-        # computed population fractions for as we do not necessarily
-        # know this ahead of time
-        n_e = int(len(lines)/n_s - 1)
-        # The file is arranged in n_s groups of n_e+1 lines each where the first
-        # line is the coordinate and the subsequent lines are the population fraction
-        # for each element, with each column corresponding to an ion of that element
-        # First, separate by coordinate
-        pop_lists = [lines[i*(n_e+1)+1:(i+1)*(n_e+1)] for i in range(n_s)]
-        # Convert each row of each group into a floating point array
-        pop_lists = [[np.array(l.split(), dtype=float) for l in p] for p in pop_lists]
-        # NOTE: each row has Z+2 entries as the first entry is the atomic number Z
-        # Get these from just the first group as the number of elements is the same
-        # for each
-        Z = np.array([p[0] for p in pop_lists[0]], dtype=int)
-        pop_arrays = [np.zeros((n_s, z+1))for z in Z]
-        for i, p in enumerate(pop_lists):
-            for j, l in enumerate(p):
-                pop_arrays[j][i, :] = l[1:]  # Skip first entry, it is the atomic number
-
-        # Then set attributes for each ion of each element
-        for z, p in zip(Z, pop_arrays):
-            name = plasmapy.particles.element_name(z)
-            attr = f'_population_fraction_{name}'
-            setattr(self, attr, p)
-            for p in [(f'{attr[1:]}_{i+1}', attr, i, '') for i in range(z+1)]:
-                add_property(*p)
+        self._ine_data = read_ine_file(self._ine_filename, self.coordinate.shape[0])
+        for col in self._ine_data.colnames:
+            _add_property(col, '_ine_data')
 
     def _read_hstate(self):
         """
         Parse the hydrogen energy level populations file
         """
-        try:
-            self._hstate_data = np.loadtxt(self._hstate_filename)
-        except OSError:
-            log.debug(f'{self._hstate_filename} not found')
-            self._hstate_data = None
+        if not self._hstate_filename.is_file():
+            log.warning(f'{self._hstate_filename} not found. Skipping parsing of hstate files. Set read_hstate=False to suppress this warning.')
+            return
+        self._hstate_data = read_hstate_file(self._hstate_filename)
+        for col in self._hstate_data.colnames:
+            _add_property(col, '_hstate_data')
 
     @property
     @u.quantity_input
@@ -499,7 +400,7 @@ def grid_centers(self) -> u.cm:
         """
         Spatial location of the grid centers
         """
-        return u.Quantity(self._grid_centers, 'cm')
+        return self._amr_data['grid_centers']
 
     @property
     @u.quantity_input
@@ -507,7 +408,7 @@ def grid_widths(self) -> u.cm:
         """
         Spatial width of each grid cell
         """
-        return u.Quantity(self._grid_widths, 'cm')
+        return self._amr_data['grid_widths']
 
     @property
     @u.quantity_input
@@ -535,6 +436,102 @@ def grid_edges_right(self) -> u.cm:
         """
         return self.grid_edges[1:]
 
+    @property
+    @u.quantity_input
+    def coordinate(self) -> u.cm:
+        """
+        Coordinate in the field-aligned direction
+        """
+        return self.grid_centers
+
+    @property
+    @u.quantity_input
+    def electron_mass_density(self) -> u.Unit('g cm-3'):
+        # TODO: Account for possible presence of both electron
+        # and ion mass densities. If the electron mass density
+        # is present in this file, it will shift all of the
+        # indices in the .amr file by one.
+        if 'electron_mass_density' in self._amr_data.colnames:
+            return self._amr_data['electron_mass_density']
+        return self.ion_mass_density
+
+    @property
+    @u.quantity_input
+    def ion_mass_density(self) -> u.Unit('g cm-3'):
+        return self._amr_data['ion_mass_density']
+
+    @property
+    @u.quantity_input
+    def momentum_density(self) -> u.Unit('g cm-2 s-1'):
+        return self._amr_data['momentum_density']
+
+    @property
+    @u.quantity_input
+    def electron_energy_density(self) -> u.Unit('erg cm-3'):
+        return self._amr_data['electron_energy_density']
+
+    @property
+    @u.quantity_input
+    def ion_energy_density(self) -> u.Unit('erg cm-3'):
+        return self._amr_data['ion_energy_density']
+
+    @property
+    @u.quantity_input
+    def velocity(self) -> u.cm/u.s:
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['velocity']
+        return self.momentum_density / self.ion_mass_density
+
+    @property
+    @u.quantity_input
+    def ion_density(self) -> u.Unit('cm-3'):
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['ion_density']
+        # NOTE: This is pulled directly from the HYDRAD source code
+        # https://github.com/rice-solar-physics/HYDRAD/blob/master/Resources/source/constants.h
+        # and is the average ion mass for a H-He plasma computed by weighting the H and He ion
+        # masses with a particular set of abundances.
+        m_ion = 2.171e-24 * u.g
+        return self.ion_mass_density / m_ion
+
+    @property
+    @u.quantity_input
+    def electron_density(self) -> u.Unit('cm-3'):
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['electron_density']
+        # FIXME: If this exists as a separate column in the .amr file then
+        return self.ion_density
+
+    @property
+    @u.quantity_input
+    def electron_pressure(self) -> u.Unit('dyne cm-2'):
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['electron_pressure']
+        gamma = 5/3
+        return self.electron_energy_density / (gamma - 1)
+
+    @property
+    @u.quantity_input
+    def ion_pressure(self) -> u.Unit('dyne cm-2'):
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['ion_pressure']
+        gamma = 5/3
+        return (self.ion_energy_density - self.momentum_density**2/(2*self.ion_mass_density))/(gamma - 1)
+
+    @property
+    @u.quantity_input
+    def electron_temperature(self) -> u.Unit('K'):
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['electron_temperature']
+        return self.electron_pressure / (const.k_B*self.electron_density)
+
+    @property
+    @u.quantity_input
+    def ion_temperature(self) -> u.Unit('K'):
+        if hasattr(self, '_phy_data'):
+            return self._phy_data['ion_temperature']
+        return self.ion_pressure / (const.k_B*self.ion_density)
+
     def spatial_average(self, quantity, bounds=None):
         """
         Compute a spatial average of a specific quantity
@@ -626,7 +623,7 @@ def _phy_filename(self):
         return self.hydrad_root / 'Initial_Conditions' / 'profiles' / 'initial.amr.phy'
 
 
-def add_property(name, attr, index, unit):
+def _add_property(name, attr,):
     """
     Auto-generate properties for various pieces of data
     """
@@ -634,25 +631,7 @@ def property_template(self):
         data = getattr(self, attr)
         if data is None:
             raise ValueError(f'No data available for {name}')
-        return u.Quantity(data[:, index], unit)
+        return u.Quantity(data[name])
     property_template.__doc__ = f'{" ".join(name.split("_")).capitalize()} as a function of :math:`s`'
     property_template.__name__ = name
     setattr(Profile, property_template.__name__, property(property_template))
-
-
-properties = [
-    ('coordinate', '_phy_data', 0, 'cm'),
-    ('velocity', '_phy_data', 1, 'cm / s'),
-    ('sound_speed', '_phy_data', 2, 'cm / s'),
-    ('electron_density', '_phy_data', 3, 'cm-3'),
-    ('ion_density', '_phy_data', 4, 'cm-3'),
-    ('electron_pressure', '_phy_data', 5, 'dyne cm-2'),
-    ('ion_pressure', '_phy_data', 6, 'dyne cm-2'),
-    ('electron_temperature', '_phy_data', 7, 'K'),
-    ('ion_temperature', '_phy_data', 8, 'K'),
-    ('electron_heat_flux', '_phy_data', 9, 'erg s-1 cm-2'),
-    ('ion_heat_flux', '_phy_data', 10, 'erg s-1 cm-2'),
-]
-properties += [(f'level_population_hydrogen_{i}', '_hstate_data', i, '') for i in range(1, 7)]
-for p in properties:
-    add_property(*p)
diff --git a/pydrad/parse/tests/test_strand.py b/pydrad/parse/tests/test_strand.py
index a6de280..79c1ed9 100644
--- a/pydrad/parse/tests/test_strand.py
+++ b/pydrad/parse/tests/test_strand.py
@@ -96,15 +96,17 @@ def test_emission_measure(strand):
 def test_term_file_output(strand):
     for p in strand:
         # The electron energy equation's numerical viscosity term is always 0:
-        assert u.allclose(p.electron_numerical_viscosity,
-                        np.zeros_like(p.electron_numerical_viscosity),
-                        rtol=0.0,
-                        )
+        assert u.allclose(
+            p.electron_numerical_viscosity,
+            np.zeros_like(p.electron_numerical_viscosity),
+            rtol=0.0,
+        )
         # The hydrogen energy equation's collision rate is never 0 at all positions:
-        assert not u.allclose(p.hydrogen_collisions,
-                            np.zeros_like(p.hydrogen_collisions),
-                            rtol=0.0,
-                        )
+        assert not u.allclose(
+            p.hydrogen_collisions,
+            np.zeros_like(p.hydrogen_collisions),
+            rtol=0.0,
+        )
 
 def test_term_file_units(strand):
     assert strand[0].mass_advection.unit == u.Unit('g s-1 cm-3')
diff --git a/pydrad/parse/util.py b/pydrad/parse/util.py
new file mode 100644
index 0000000..c86ae30
--- /dev/null
+++ b/pydrad/parse/util.py
@@ -0,0 +1,222 @@
+"""
+Utilities related to parsing HYDRAD results
+"""
+import pathlib
+
+import astropy.table
+import numpy as np
+import plasmapy.particles
+from pandas import read_csv
+
+__all__ = [
+    'read_amr_file',
+    'read_phy_file',
+    'read_ine_file',
+    'read_trm_file',
+    'read_hstate_file',
+]
+
+
+def read_amr_file(filename):
+    """
+    Parse ``.amr`` files containing grid parameters and conserved quantities as
+    a function of position
+    """
+    # TODO: Account for possible presence of both electron
+    # and ion mass densities. If the electron mass density
+    # is present in this file, it will shift all of the
+    # columns from the index=2 position to the right.
+    columns = [
+        'grid_centers',
+        'grid_widths',
+        'ion_mass_density',
+        'momentum_density',
+        'electron_energy_density',
+        'ion_energy_density',
+    ]
+    # FIXME: Get actual names for these additional columns
+    columns += [f'col{i}' for i in range(6,19)]
+    units = {
+        'grid_centers': 'cm',
+        'grid_widths': 'cm',
+        'ion_mass_density': 'g cm-3',
+        'momentum_density': 'g cm-2 s-1',
+        'electron_energy_density': 'erg cm-3',
+        'ion_energy_density': 'erg cm-3',
+    }
+    return astropy.table.QTable.read(
+        filename,
+        format='ascii',
+        data_start=4,
+        names=columns,
+        units=units,
+    )
+
+
+def read_phy_file(filename):
+    """
+    Parse ``.phy`` files containing thermodynamic quantities as a function of position.
+    """
+    columns = [
+        'coordinate',
+        'velocity',
+        'sound_speed',
+        'electron_density',
+        'ion_density',
+        'electron_pressure',
+        'ion_pressure',
+        'electron_temperature',
+        'ion_temperature',
+        'electron_heat_flux',
+        'ion_heat_flux',
+    ]
+    units = {
+        'coordinate': 'cm',
+        'velocity': 'cm / s',
+        'sound_speed': 'cm / s',
+        'electron_density': 'cm-3',
+        'ion_density': 'cm-3',
+        'electron_pressure': 'dyne cm-2',
+        'ion_pressure': 'dyne cm-2',
+        'electron_temperature': 'K',
+        'ion_temperature': 'K',
+        'electron_heat_flux': 'erg s-1 cm-2',
+        'ion_heat_flux': 'erg s-1 cm-2',
+    }
+    return astropy.table.QTable.read(
+        filename,
+        format='ascii',
+        names=columns,
+        units=units,
+    )
+
+
+def read_ine_file(filename, n_s):
+    """
+    Parse ``.ine`` files containing ionization fraction as a function of position
+
+    .. note:: This is not intended for direct use. Use the `pydrad.parse.Strand`
+              object instead.
+
+    Parameters
+    ----------
+    filename: path-like
+    n_s: `int`
+    """
+    # TODO: clean this up somehow? I've purposefully included
+    # a lot of comments because the format of this file makes
+    # the parsing code quite opaque
+    with pathlib.Path(filename).open() as f:
+        lines = f.readlines()
+    # First parse all of the population fraction arrays
+    # NOTE: Have to calculate the number of elements we have
+    # computed population fractions for as we do not necessarily
+    # know this ahead of time
+    n_e = int(len(lines)/n_s - 1)
+    # The file is arranged in n_s groups of n_e+1 lines each where the first
+    # line is the coordinate and the subsequent lines are the population fraction
+    # for each element, with each column corresponding to an ion of that element
+    # First, separate by coordinate
+    pop_lists = [lines[i*(n_e+1)+1:(i+1)*(n_e+1)] for i in range(n_s)]
+    # Convert each row of each group into a floating point array
+    pop_lists = [[np.array(l.split(), dtype=float) for l in p] for p in pop_lists]
+    # NOTE: each row has Z+2 entries as the first entry is the atomic number Z
+    # Get these from just the first group as the number of elements is the same
+    # for each
+    Z = np.array([p[0] for p in pop_lists[0]], dtype=int)
+    pop_arrays = [np.zeros((n_s, z+1)) for z in Z]
+    for i, p in enumerate(pop_lists):
+        for j, line in enumerate(p):
+            pop_arrays[j][i, :] = line[1:]  # Skip first entry, it is the atomic number
+    columns = []
+    for z in Z:
+        el_name = plasmapy.particles.element_name(z)
+        columns += [f'{el_name}_{i+1}' for i in range(z+1)]
+    data = np.hstack([p for p in pop_arrays])
+    return astropy.table.QTable(data=data, names=columns)
+
+
+def read_trm_file(filename):
+    """
+    Parse ``.trm`` files with hydrodynamic equation terms as a function of position.
+
+    The files come in sets of 5 rows with variable number of columns:
+        -- Loop coordinate (1 column), and at each position:
+        -- Terms of mass equation (2 columns)
+        -- Terms of momentum equation (6 columns)
+        -- Terms of electron energy equation (11 columns)
+        -- Terms of hydrogen energy equation (11 columns)
+    """
+    units = {
+        'mass': 'g cm-3 s-1',
+        'momentum': 'dyne cm-3 s-1',
+        'electron': 'erg cm-3 s-1',
+        'hydrogen': 'erg cm-3 s-1',
+    }
+    columns = {
+        'mass': [
+            'mass_drhobydt',
+            'mass_advection',
+        ],
+        'momentum': [
+            'momentum_drho_vbydt',
+            'momentum_advection',
+            'momentum_pressure_gradient',
+            'momentum_gravity',
+            'momentum_viscous_stress',
+            'momentum_numerical_viscosity',
+        ],
+        'electron': [
+            'electron_dTEKEbydt',
+            'electron_enthalpy',
+            'electron_conduction',
+            'electron_gravity',
+            'electron_collisions',
+            'electron_heating',
+            'electron_radiative_loss',
+            'electron_electric_field',
+            'electron_viscous_stress',
+            'electron_numerical_viscosity',
+            'electron_ionization'
+        ],
+        'hydrogen': [
+            'hydrogen_dTEKEbydt',
+            'hydrogen_enthalpy',
+            'hydrogen_conduction',
+            'hydrogen_gravity',
+            'hydrogen_collisions',
+            'hydrogen_heating',
+            'hydrogen_radiative_loss',
+            'hydrogen_electric_field',
+            'hydrogen_viscous_stress',
+            'hydrogen_numerical_viscosity',
+            'hydrogen_ionization',
+        ]
+    }
+    tables = []
+    for i,(k,v) in enumerate(columns.items()):
+        tables.append(
+            astropy.table.QTable.from_pandas(
+                read_csv(
+                    filename,
+                    sep='\t',
+                    header=None,
+                    names=v,
+                    skiprows=lambda x: x % 5 != (i+1)
+                ),
+                units={c: units[k] for c in v}
+            )
+        )
+    return astropy.table.hstack(tables)
+
+
+def read_hstate_file(filename):
+    """
+    Parse the ``.hstate`` files containing the hydrogen level populations as a function of position
+    """
+    columns = [f'level_population_hydrogen_{i}' for i in range(1,7)]
+    return astropy.table.QTable.read(
+        filename,
+        format='ascii',
+        names=columns,
+    )