From c5c616904efdec8df4794f71371430b1ce6d278b Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Mon, 10 Apr 2023 13:40:33 -0500
Subject: [PATCH 01/14] Remove stale unused funcs.

---
 mirgecom/simutil.py | 410 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 398 insertions(+), 12 deletions(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 2a08ea8bb..553e525a6 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -17,13 +17,25 @@
 .. autofunction:: max_component_norm
 .. autofunction:: check_naninf_local
 .. autofunction:: check_range_local
+.. autofunction:: boundary_report
 
 Mesh and element utilities
 --------------------------
 
+.. autofunction:: geometric_mesh_partitioner
 .. autofunction:: generate_and_distribute_mesh
 .. autofunction:: get_number_of_tetrahedron_nodes
 
+Simulation support utilities
+----------------------------
+
+.. autofunction:: configurate
+
+Lazy eval utilities
+-------------------
+
+.. autofunction:: force_evaluation
+
 File comparison utilities
 -------------------------
 
@@ -57,31 +69,33 @@
 """
 import logging
 import numpy as np
-import grudge.op as op
-
-from arraycontext import map_array_container, flatten
-
 from functools import partial
 
+import grudge.op as op
+# from grudge.op import nodal_min, elementwise_min
+from arraycontext import map_array_container, flatten
 from meshmode.dof_array import DOFArray
+from mirgecom.viscous import get_viscous_timestep
 
 from typing import List, Dict, Optional
 from grudge.discretization import DiscretizationCollection
 from grudge.dof_desc import DD_VOLUME_ALL
-from mirgecom.viscous import get_viscous_timestep
-
+from mirgecom.utils import normalize_boundaries
 import pyopencl as cl
 
 logger = logging.getLogger(__name__)
 
 
-def get_number_of_tetrahedron_nodes(dim, order):
+def get_number_of_tetrahedron_nodes(dim, order, include_faces=False):
     """Get number of nodes (modes) in *dim* Tetrahedron of *order*."""
     # number of {nodes, modes} see e.g.:
     # JSH/TW Nodal DG Methods, Section 10.1
     # DOI: 10.1007/978-0-387-72067-8
-    return int(np.math.factorial(dim+order)
-               / (np.math.factorial(dim) * np.math.factorial(order)))
+    nnodes = int(np.math.factorial(dim+order)
+                 / (np.math.factorial(dim) * np.math.factorial(order)))
+    if include_faces:
+        nnodes = nnodes + (dim+1)*get_number_of_tetrahedron_nodes(dim-1, order)
+    return nnodes
 
 
 def check_step(step, interval):
@@ -431,7 +445,317 @@ def max_component_norm(dcoll, fields, order=np.inf, *, dd=DD_VOLUME_ALL):
         componentwise_norms(dcoll, fields, order, dd=dd), actx)))
 
 
-def generate_and_distribute_mesh(comm, generate_mesh):
+def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
+                               auto_balance=False, imbalance_tolerance=.01,
+                               debug=False):
+    """Partition a mesh uniformly along the X coordinate axis.
+
+    The intent is to partition the mesh uniformly along user-specified
+    directions. In this current interation, the capability is demonstrated
+    by splitting along the X axis.
+
+    Parameters
+    ----------
+    mesh: :class:`meshmode.mesh.Mesh`
+        The serial mesh to partition
+    num_ranks: int
+        The number of partitions to make
+    nranks_per_axis: numpy.ndarray
+        How many partitions per specified axis.  Currently unused.
+    auto_balance: bool
+        Indicates whether to perform automatic balancing.  If true, the
+        partitioner will try to balance the number of elements over
+        the partitions.
+    imbalance_tolerance: float
+        If *auto_balance* is True, this parameter indicates the acceptable
+        relative difference to the average number of elements per partition.
+        It defaults to balance within 1%.
+    debug: bool
+        En/disable debugging/diagnostic print reporting.
+
+    Returns
+    -------
+    elem_to_rank: numpy.ndarray
+        Array indicating the MPI rank for each element
+    """
+    mesh_dimension = mesh.dim
+    if nranks_per_axis is None:
+        nranks_per_axis = np.ones(mesh_dimension)
+        nranks_per_axis[0] = num_ranks
+    if len(nranks_per_axis) != mesh_dimension:
+        raise ValueError("nranks_per_axis must match mesh dimension.")
+    nranks_test = 1
+    for i in range(mesh_dimension):
+        nranks_test = nranks_test * nranks_per_axis[i]
+    if nranks_test != num_ranks:
+        raise ValueError("nranks_per_axis must match num_ranks.")
+
+    mesh_groups, = mesh.groups
+    mesh_verts = mesh.vertices
+    mesh_x = mesh_verts[0]
+
+    x_min = np.min(mesh_x)
+    x_max = np.max(mesh_x)
+    x_interval = x_max - x_min
+    part_loc = np.linspace(x_min, x_max, num_ranks+1)
+
+    part_interval = x_interval / nranks_per_axis[0]
+    elem_x = mesh_verts[0, mesh_groups.vertex_indices]
+    elem_centroids = np.sum(elem_x, axis=1)/elem_x.shape[1]
+    global_nelements = len(elem_centroids)
+    aver_part_nelem = global_nelements / num_ranks
+
+    if debug:
+        print(f"Partitioning {global_nelements} elements in"
+              f" [{x_min},{x_max}]/{num_ranks}")
+        print(f"Average nelem/part: {aver_part_nelem}")
+        print(f"Initial part locs: {part_loc=}")
+
+    # Create geometrically even partitions
+    elem_to_rank = ((elem_centroids-x_min) / part_interval).astype(int)
+
+    # map partition id to list of elements in that partition
+    part_to_elements = {r: set((np.where(elem_to_rank == r))[0].flat)
+                        for r in range(num_ranks)}
+    # make an array of the geometrically even partition sizes
+    # avoids calling "len" over and over on the element sets
+    nelem_part = [len(part_to_elements[r]) for r in range(num_ranks)]
+
+    if debug:
+        print(f"Initial: {nelem_part=}")
+
+    # Automatic load-balancing
+    if auto_balance:
+
+        for r in range(num_ranks-1):
+
+            # find the element reservoir (next part with elements in it)
+            adv_part = r + 1
+            while nelem_part[adv_part] == 0:
+                adv_part = adv_part + 1
+
+            num_elem_needed = aver_part_nelem - nelem_part[r]
+            part_imbalance = np.abs(num_elem_needed) / float(aver_part_nelem)
+
+            if debug:
+                print(f"Processing part({r=})")
+                print(f"{part_loc[r]=}, {adv_part=}")
+                print(f"{num_elem_needed=}, {part_imbalance=}")
+                print(f"{nelem_part=}")
+
+            niter = 0
+            total_change = 0
+            moved_elements = set()
+
+            while ((part_imbalance > imbalance_tolerance)
+                   and (adv_part < num_ranks)):
+                # This partition needs to keep changing in size until it meets the
+                # specified imbalance tolerance, or gives up trying
+
+                # seek out the element reservoir
+                while nelem_part[adv_part] == 0:
+                    adv_part = adv_part + 1
+                    if adv_part >= num_ranks:
+                        raise ValueError("Ran out of elements to partition.")
+
+                if debug:
+                    print(f"-{nelem_part[r]=}, adv_part({adv_part}),"
+                          f" {nelem_part[adv_part]=}")
+                    print(f"-{part_loc[r+1]=},{part_loc[adv_part+1]=}")
+                    print(f"-{num_elem_needed=},{part_imbalance=}")
+
+                if niter > 100:
+                    raise ValueError("Detected too many iterations in partitioning.")
+
+                # The purpose of the next block is to populate the "moved_elements"
+                # data structure. Then those elements will be moved between the
+                # current partition being processed and the "reservoir,"
+                # *and* to adjust the position of the "right" side of the current
+                # partition boundary.
+                moved_elements = set()
+                num_elements_added = 0
+
+                if num_elem_needed > 0:
+
+                    # Partition is SMALLER than it should be, grab elements from
+                    # the reservoir
+                    if debug:
+                        print(f"-Grabbing elements from reservoir({adv_part})"
+                              f", {nelem_part[adv_part]=}")
+
+                    portion_needed = (float(abs(num_elem_needed))
+                                      / float(nelem_part[adv_part]))
+                    portion_needed = min(portion_needed, 1.0)
+
+                    if debug:
+                        print(f"--Chomping {portion_needed*100}% of"
+                              f" reservoir({adv_part}) [by nelem].")
+
+                    if portion_needed == 1.0:  # Chomp
+                        new_loc = part_loc[adv_part+1]
+                        moved_elements.update(part_to_elements[adv_part])
+
+                    else:  # Bite
+                        # This is the spatial size of the reservoir
+                        reserv_interval = part_loc[adv_part+1] - part_loc[r+1]
+
+                        # Find what portion of the reservoir to grab spatially
+                        # This part is needed because the elements are not
+                        # distributed uniformly in space.
+                        fine_tuned = False
+                        trial_portion_needed = portion_needed
+                        while not fine_tuned:
+                            pos_update = trial_portion_needed*reserv_interval
+                            new_loc = part_loc[r+1] + pos_update
+
+                            moved_elements = set()
+                            num_elem_mv = 0
+                            for e in part_to_elements[adv_part]:
+                                if elem_centroids[e] <= new_loc:
+                                    moved_elements.add(e)
+                                    num_elem_mv = num_elem_mv + 1
+                            if num_elem_mv < num_elem_needed:
+                                fine_tuned = True
+                            else:
+                                ovrsht = (num_elem_mv - num_elem_needed)
+                                rel_ovrsht = ovrsht/float(num_elem_needed)
+                                if rel_ovrsht > 0.8:
+                                    # bisect the space grabbed and try again
+                                    trial_portion_needed = trial_portion_needed/2.0
+                                else:
+                                    fine_tuned = True
+
+                        portion_needed = trial_portion_needed
+                        new_loc = part_loc[r+1] + pos_update
+                        if debug:
+                            print(f"--Tuned: {portion_needed=} [by spatial volume]")
+                            print(f"--Advancing part({r}) by +{pos_update}")
+
+                    num_elements_added = len(moved_elements)
+                    if debug:
+                        print(f"--Adding {num_elements_added} to part({r}).")
+
+                else:
+
+                    # Partition is LARGER than it should be
+                    # Grab the spatial size of the current partition
+                    # to estimate the portion we need to shave off
+                    # assuming uniform element density
+                    part_interval = part_loc[r+1] - part_loc[r]
+                    num_to_move = -num_elem_needed
+                    portion_needed = num_to_move/float(nelem_part[r])
+
+                    if debug:
+                        print(f"--Shaving off {portion_needed*100}% of"
+                              f" partition({r}) [by nelem].")
+
+                    # Tune the shaved portion to account for
+                    # non-uniform element density
+                    fine_tuned = False
+                    while not fine_tuned:
+                        pos_update = portion_needed*part_interval
+                        new_pos = part_loc[r+1] - pos_update
+                        moved_elements = set()
+                        num_elem_mv = 0
+                        for e in part_to_elements[r]:
+                            if elem_centroids[e] > new_pos:
+                                moved_elements.add(e)
+                                num_elem_mv = num_elem_mv + 1
+                        if num_elem_mv < num_to_move:
+                            fine_tuned = True
+                        else:
+                            ovrsht = (num_elem_mv - num_to_move)
+                            rel_ovrsht = ovrsht/float(num_to_move)
+                            if rel_ovrsht > 0.8:
+                                # bisect and try again
+                                portion_needed = portion_needed/2.0
+                            else:
+                                fine_tuned = True
+
+                    # new "right" wall location of shranken part
+                    # and negative num_elements_added for removal
+                    new_loc = new_pos
+                    num_elements_added = -len(moved_elements)
+                    if debug:
+                        print(f"--Reducing partition size by {portion_needed*100}%"
+                              " [by nelem].")
+                        print(f"--Removing {-num_elements_added} from part({r}).")
+
+                # Now "moved_elements", "num_elements_added", and "new_loc"
+                # are computed.  Update the partition, and reservoir.
+                if debug:
+                    print(f"--Number of elements to ADD: {num_elements_added}.")
+
+                if num_elements_added > 0:
+                    part_to_elements[r].update(moved_elements)
+                    part_to_elements[adv_part].difference_update(
+                        moved_elements)
+                    for e in moved_elements:
+                        elem_to_rank[e] = r
+                else:
+                    part_to_elements[r].difference_update(moved_elements)
+                    part_to_elements[adv_part].update(moved_elements)
+                    for e in moved_elements:
+                        elem_to_rank[e] = adv_part
+
+                total_change = total_change + num_elements_added
+                part_loc[r+1] = new_loc
+                if debug:
+                    print(f"--Before: {nelem_part=}")
+                nelem_part[r] = nelem_part[r] + num_elements_added
+                nelem_part[adv_part] = nelem_part[adv_part] - num_elements_added
+                if debug:
+                    print(f"--After: {nelem_part=}")
+
+                # Compute new nelem_needed and part_imbalance
+                num_elem_needed = num_elem_needed - num_elements_added
+                part_imbalance = \
+                    np.abs(num_elem_needed) / float(aver_part_nelem)
+                niter = niter + 1
+
+            # Summarize the total change and state of the partition
+            # and reservoir
+            if debug:
+                print(f"-Part({r}): {total_change=}")
+                print(f"-Part({r=}): {nelem_part[r]=}, {part_imbalance=}")
+                print(f"-Part({adv_part}): {nelem_part[adv_part]=}")
+
+    # Validate the partitioning before returning
+    total_partitioned_elements = sum([len(part_to_elements[r])
+                                      for r in range(num_ranks)])
+    total_nelem_part = sum([nelem_part[r] for r in range(num_ranks)])
+
+    if debug:
+        print("Validating mesh parts.")
+
+    if total_partitioned_elements != total_nelem_part:
+        raise ValueError("Validator: parted element counts dont match")
+    if total_partitioned_elements != global_nelements:
+        raise ValueError("Validator: global element counts dont match.")
+    if len(elem_to_rank) != global_nelements:
+        raise ValueError("Validator: elem-to-rank wrong size.")
+    if np.any(nelem_part) <= 0:
+        raise ValueError("Validator: empty partitions.")
+
+    for e in range(global_nelements):
+        part = elem_to_rank[e]
+        if e not in part_to_elements[part]:
+            raise ValueError("Validator: part/element/part map mismatch.")
+
+    part_counts = np.zeros(global_nelements)
+    for part_elements in part_to_elements.values():
+        for element in part_elements:
+            part_counts[element] = part_counts[element] + 1
+
+    if np.any(part_counts > 1):
+        raise ValueError("Validator: degenerate elements")
+    if np.any(part_counts < 1):
+        raise ValueError("Validator: orphaned elements")
+
+    return elem_to_rank
+
+
+def generate_and_distribute_mesh(comm, generate_mesh, **kwargs):
     """Generate a mesh and distribute it among all ranks in *comm*.
 
     Generate the mesh with the user-supplied mesh generation function
@@ -489,6 +813,66 @@ def create_parallel_grid(comm, generate_grid):
     return generate_and_distribute_mesh(comm=comm, generate_mesh=generate_grid)
 
 
+def boundary_report(dcoll, boundaries, outfile_name, *, dd=DD_VOLUME_ALL,
+                    mesh=None):
+    """Generate a report of the grid boundaries."""
+    boundaries = normalize_boundaries(boundaries)
+
+    comm = dcoll.mpi_communicator
+    nproc = 1
+    rank = 0
+    if comm is not None:
+        nproc = comm.Get_size()
+        rank = comm.Get_rank()
+
+    if mesh is not None:
+        nelem = 0
+        for grp in mesh.groups:
+            nelem = nelem + grp.nelements
+        local_header = f"nproc: {nproc}\nrank: {rank}\nnelem: {nelem}\n"
+    else:
+        local_header = f"nproc: {nproc}\nrank: {rank}\n"
+
+    from io import StringIO
+    local_report = StringIO(local_header)
+    local_report.seek(0, 2)
+
+    for bdtag in boundaries:
+        boundary_discr = dcoll.discr_from_dd(bdtag)
+        nnodes = sum([grp.ndofs for grp in boundary_discr.groups])
+        local_report.write(f"{bdtag}: {nnodes}\n")
+
+    from meshmode.mesh import BTAG_PARTITION
+    from meshmode.distributed import get_connected_parts
+    connected_part_ids = get_connected_parts(dcoll.discr_from_dd(dd).mesh)
+    local_report.write(f"num_nbr_parts: {len(connected_part_ids)}\n")
+    local_report.write(f"connected_part_ids: {connected_part_ids}\n")
+    part_nodes = []
+    for connected_part_id in connected_part_ids:
+        boundary_discr = dcoll.discr_from_dd(
+            dd.trace(BTAG_PARTITION(connected_part_id)))
+        nnodes = sum([grp.ndofs for grp in boundary_discr.groups])
+        part_nodes.append(nnodes)
+    if part_nodes:
+        local_report.write(f"nnodes_pb: {part_nodes}\n")
+
+    local_report.write("-----\n")
+    local_report.seek(0)
+
+    for irank in range(nproc):
+        if irank == rank:
+            f = open(outfile_name, "a+")
+            f.write(local_report.read())
+            f.close()
+        if comm is not None:
+            comm.barrier()
+
+
+def force_evaluation(actx, expn):
+    """Wrap freeze/thaw forcing evaluation of expressions."""
+    return actx.thaw(actx.freeze(expn))
+
+
 def get_reasonable_memory_pool(ctx: cl.Context, queue: cl.CommandQueue,
                                force_buffer: bool = False,
                                force_non_pool: bool = False):
@@ -519,7 +903,6 @@ def get_reasonable_memory_pool(ctx: cl.Context, queue: cl.CommandQueue,
             ctx, alignment=0, queue=queue))
     else:
         from warnings import warn
-
         if not has_coarse_grain_buffer_svm(queue.device):
             warn(f"No SVM support on {queue.device}, returning a CL buffer-based "
                   "memory pool. If you are running with PoCL-cuda, please update "
@@ -537,7 +920,10 @@ def configurate(config_key, config_object=None, default_value=None):
         d = config_object if isinstance(config_object, dict) else\
             config_object.__dict__
         if config_key in d:
-            return d[config_key]
+            value = d[config_key]
+            if default_value is not None:
+                return type(default_value)(value)
+            return value
     return default_value
 
 

From 66d51b271b05f0910a77329bfdf2637ef0eeab8b Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Mon, 10 Apr 2023 13:48:56 -0500
Subject: [PATCH 02/14] Update IO utils for prediction

---
 mirgecom/io.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/mirgecom/io.py b/mirgecom/io.py
index dfbf9d42b..628605361 100644
--- a/mirgecom/io.py
+++ b/mirgecom/io.py
@@ -3,6 +3,7 @@
 .. autofunction:: make_status_message
 .. autofunction:: make_rank_fname
 .. autofunction:: make_par_fname
+.. autofunction:: read_and_distribute_yaml_data
 """
 
 __copyright__ = """
@@ -38,7 +39,8 @@
 def make_init_message(*, dim, order, dt, t_final,
                       nstatus, nviz, cfl, constant_cfl,
                       initname, eosname, casename,
-                      nelements=0, global_nelements=0):
+                      nelements=0, global_nelements=0,
+                      t_initial=0):
     """Create a summary of some general simulation parameters and inputs."""
     return (
         f"Initialization for Case({casename})\n"
@@ -46,6 +48,7 @@ def make_init_message(*, dim, order, dt, t_final,
         f"Num {dim}d order-{order} elements: {nelements}\n"
         f"Num global elements: {global_nelements}\n"
         f"Timestep:        {dt}\n"
+        f"Initial time:    {t_initial}\n"
         f"Final time:      {t_final}\n"
         f"CFL:             {cfl}\n"
         f"Constant CFL:    {constant_cfl}\n"
@@ -77,3 +80,26 @@ def make_rank_fname(basename, rank=0, step=0, t=0):
 def make_par_fname(basename, step=0, t=0):
     r"""Make parallel visualization filename."""
     return f"{basename}-{step:09d}.pvtu"
+
+
+def read_and_distribute_yaml_data(mpi_comm=None, file_path=None):
+    """Read a YAML file on one rank, broadcast result to world."""
+    import yaml
+
+    input_data = None
+    if file_path is None:
+        return input_data
+
+    rank = 0
+
+    if mpi_comm is not None:
+        rank = mpi_comm.Get_rank()
+
+    if rank == 0:
+        with open(file_path) as f:
+            input_data = yaml.load(f, Loader=yaml.FullLoader)
+
+    if mpi_comm is not None:
+        input_data = mpi_comm.bcast(input_data, root=0)
+
+    return input_data

From 680226ac02381f8abb5011c70e490ecbab38a2bc Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Fri, 14 Apr 2023 11:40:02 -0500
Subject: [PATCH 03/14] Use productions version of requirements and ci

---
 .github/workflows/ci.yaml | 2 +-
 requirements.txt          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index ec6c17d7c..c7b414d33 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -43,7 +43,7 @@ jobs:
             run: |
                 MINIFORGE_INSTALL_DIR=.miniforge3
                 . "$MINIFORGE_INSTALL_DIR/bin/activate" testing
-                python -m pip install types-psutil
+                python -m pip install types-psutil types-PyYAML
                 ./run-mypy.sh
 
     pylint:
diff --git a/requirements.txt b/requirements.txt
index 2c4213576..72e2ce930 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,6 @@ mpi4py
 numpy
 pytest
 pytest-cov
-pyvisfile
 pymetis
 importlib-resources
 psutil
@@ -24,3 +23,4 @@ git+https://github.com/pythological/kanren.git#egg=miniKanren
 --editable git+https://github.com/pyrometheus/pyrometheus.git@tulio-transport#egg=pyrometheus
 --editable git+https://github.com/illinois-ceesd/logpyle.git#egg=logpyle
 --editable git+https://github.com/kaushikcfd/feinsum.git#egg=feinsum
+--editable git+https://github.com/inducer/pyvisfile.git#egg=pyvisfile

From 60a981fe5a127ba8b736a96a80e7bb88348d2aea Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 17:18:46 -0500
Subject: [PATCH 04/14] Updates from @majosm review comments.

---
 mirgecom/simutil.py | 58 +++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 553e525a6..3cc079a96 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -445,7 +445,11 @@ def max_component_norm(dcoll, fields, order=np.inf, *, dd=DD_VOLUME_ALL):
         componentwise_norms(dcoll, fields, order, dd=dd), actx)))
 
 
-def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
+class PartitioningError(Exception):
+    pass
+
+
+def geometric_mesh_partitioner(mesh, *, num_ranks=None, nranks_per_axis=None,
                                auto_balance=False, imbalance_tolerance=.01,
                                debug=False):
     """Partition a mesh uniformly along the X coordinate axis.
@@ -479,18 +483,19 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
         Array indicating the MPI rank for each element
     """
     mesh_dimension = mesh.dim
-    if nranks_per_axis is None:
+    if nranks_per_axis is None or num_ranks is not None:
+        from warnings import warn
+        warn("num_ranks is deprecated, use nranks_per_axis instead.")
+        num_ranks = num_ranks or 1
         nranks_per_axis = np.ones(mesh_dimension)
         nranks_per_axis[0] = num_ranks
     if len(nranks_per_axis) != mesh_dimension:
-        raise ValueError("nranks_per_axis must match mesh dimension.")
-    nranks_test = 1
-    for i in range(mesh_dimension):
-        nranks_test = nranks_test * nranks_per_axis[i]
-    if nranks_test != num_ranks:
-        raise ValueError("nranks_per_axis must match num_ranks.")
-
-    mesh_groups, = mesh.groups
+        raise PartitioningError("nranks_per_axis must match mesh dimension.")
+    num_ranks = np.prod(nranks_per_axis)
+    if np.prod(nranks_per_axis[1:]) != 1:
+        raise PartitioningError("geometric_mesh_partitioner currently only supports"
+                                " partitioning in the X-dimension."
+                                "(only nranks_per_axis[0] should be > 1).")
     mesh_verts = mesh.vertices
     mesh_x = mesh_verts[0]
 
@@ -500,9 +505,15 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
     part_loc = np.linspace(x_min, x_max, num_ranks+1)
 
     part_interval = x_interval / nranks_per_axis[0]
-    elem_x = mesh_verts[0, mesh_groups.vertex_indices]
-    elem_centroids = np.sum(elem_x, axis=1)/elem_x.shape[1]
-    global_nelements = len(elem_centroids)
+
+    global_nelements = 0
+    elem_centroids = np.array([])
+    for group in mesh.groups:
+        elem_group_x = mesh_verts[0, group.vertex_indices]
+        elem_group_centroids = np.sum(elem_group_x, axis=1)/elem_group_x.shape[1]
+        global_nelements = global_nelements + len(elem_group_centroids)
+        np.concatenate((elem_centroids, elem_group_centroids))
+
     aver_part_nelem = global_nelements / num_ranks
 
     if debug:
@@ -515,7 +526,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
     elem_to_rank = ((elem_centroids-x_min) / part_interval).astype(int)
 
     # map partition id to list of elements in that partition
-    part_to_elements = {r: set((np.where(elem_to_rank == r))[0].flat)
+    part_to_elements = {r: set(np.where(elem_to_rank == r)[0])
                         for r in range(num_ranks)}
     # make an array of the geometrically even partition sizes
     # avoids calling "len" over and over on the element sets
@@ -556,7 +567,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
                 while nelem_part[adv_part] == 0:
                     adv_part = adv_part + 1
                     if adv_part >= num_ranks:
-                        raise ValueError("Ran out of elements to partition.")
+                        raise PartitioningError("Ran out of elements to partition.")
 
                 if debug:
                     print(f"-{nelem_part[r]=}, adv_part({adv_part}),"
@@ -565,7 +576,8 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
                     print(f"-{num_elem_needed=},{part_imbalance=}")
 
                 if niter > 100:
-                    raise ValueError("Detected too many iterations in partitioning.")
+                    raise PartitioningError("Detected too many iterations in"
+                                            " partitioning.")
 
                 # The purpose of the next block is to populate the "moved_elements"
                 # data structure. Then those elements will be moved between the
@@ -729,18 +741,18 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
         print("Validating mesh parts.")
 
     if total_partitioned_elements != total_nelem_part:
-        raise ValueError("Validator: parted element counts dont match")
+        raise PartitioningError("Validator: parted element counts dont match")
     if total_partitioned_elements != global_nelements:
-        raise ValueError("Validator: global element counts dont match.")
+        raise PartitioningError("Validator: global element counts dont match.")
     if len(elem_to_rank) != global_nelements:
-        raise ValueError("Validator: elem-to-rank wrong size.")
+        raise PartitioningError("Validator: elem-to-rank wrong size.")
     if np.any(nelem_part) <= 0:
-        raise ValueError("Validator: empty partitions.")
+        raise PartitioningError("Validator: empty partitions.")
 
     for e in range(global_nelements):
         part = elem_to_rank[e]
         if e not in part_to_elements[part]:
-            raise ValueError("Validator: part/element/part map mismatch.")
+            raise PartitioningError("Validator: part/element/part map mismatch.")
 
     part_counts = np.zeros(global_nelements)
     for part_elements in part_to_elements.values():
@@ -748,9 +760,9 @@ def geometric_mesh_partitioner(mesh, num_ranks=1, *, nranks_per_axis=None,
             part_counts[element] = part_counts[element] + 1
 
     if np.any(part_counts > 1):
-        raise ValueError("Validator: degenerate elements")
+        raise PartitioningError("Validator: degenerate elements")
     if np.any(part_counts < 1):
-        raise ValueError("Validator: orphaned elements")
+        raise PartitioningError("Validator: orphaned elements")
 
     return elem_to_rank
 

From aa83c48358f71a1b851ab762d940d571a8fdc779 Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 17:33:13 -0500
Subject: [PATCH 05/14] Fix up issues

---
 mirgecom/simutil.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 3cc079a96..fedc0a472 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -446,10 +446,11 @@ def max_component_norm(dcoll, fields, order=np.inf, *, dd=DD_VOLUME_ALL):
 
 
 class PartitioningError(Exception):
+    """Error tossed to indicate an error with domain decomposition."""
     pass
 
 
-def geometric_mesh_partitioner(mesh, *, num_ranks=None, nranks_per_axis=None,
+def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
                                auto_balance=False, imbalance_tolerance=.01,
                                debug=False):
     """Partition a mesh uniformly along the X coordinate axis.
@@ -463,9 +464,9 @@ def geometric_mesh_partitioner(mesh, *, num_ranks=None, nranks_per_axis=None,
     mesh: :class:`meshmode.mesh.Mesh`
         The serial mesh to partition
     num_ranks: int
-        The number of partitions to make
+        The number of partitions to make (deprecated)
     nranks_per_axis: numpy.ndarray
-        How many partitions per specified axis.  Currently unused.
+        How many partitions per specified axis.
     auto_balance: bool
         Indicates whether to perform automatic balancing.  If true, the
         partitioner will try to balance the number of elements over

From 103ae22a8b163615eef7427c428ba8540906429a Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 17:41:47 -0500
Subject: [PATCH 06/14] Fixish

---
 mirgecom/simutil.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index fedc0a472..4f078a14c 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -488,7 +488,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
         from warnings import warn
         warn("num_ranks is deprecated, use nranks_per_axis instead.")
         num_ranks = num_ranks or 1
-        nranks_per_axis = np.ones(mesh_dimension)
+        nranks_per_axis = np.ones(mesh_dimension, dtype=np.int8)
         nranks_per_axis[0] = num_ranks
     if len(nranks_per_axis) != mesh_dimension:
         raise PartitioningError("nranks_per_axis must match mesh dimension.")

From 8e66c6f8e4b9d0938195ebe1e19811ef35334856 Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 17:43:30 -0500
Subject: [PATCH 07/14] Fixish

---
 mirgecom/simutil.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 4f078a14c..72ea2acc1 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -447,6 +447,7 @@ def max_component_norm(dcoll, fields, order=np.inf, *, dd=DD_VOLUME_ALL):
 
 class PartitioningError(Exception):
     """Error tossed to indicate an error with domain decomposition."""
+
     pass
 
 

From 4002b25bb1d2fc376effae3cacdef173e2e89048 Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 17:51:54 -0500
Subject: [PATCH 08/14] Fixish

---
 mirgecom/simutil.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 72ea2acc1..589e7dee9 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -528,7 +528,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
     elem_to_rank = ((elem_centroids-x_min) / part_interval).astype(int)
 
     # map partition id to list of elements in that partition
-    part_to_elements = {r: set(np.where(elem_to_rank == r)[0])
+    part_to_elements = {r: set(np.where(elem_to_rank == r)[0].flat)
                         for r in range(num_ranks)}
     # make an array of the geometrically even partition sizes
     # avoids calling "len" over and over on the element sets

From 4b93ed2e708578ac2a67aa4bb7907a5ba7c79107 Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 17:59:12 -0500
Subject: [PATCH 09/14] fixish

---
 mirgecom/simutil.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 589e7dee9..ed52e0da3 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -484,6 +484,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
     elem_to_rank: numpy.ndarray
         Array indicating the MPI rank for each element
     """
+    debug = True
     mesh_dimension = mesh.dim
     if nranks_per_axis is None or num_ranks is not None:
         from warnings import warn
@@ -528,7 +529,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
     elem_to_rank = ((elem_centroids-x_min) / part_interval).astype(int)
 
     # map partition id to list of elements in that partition
-    part_to_elements = {r: set(np.where(elem_to_rank == r)[0].flat)
+    part_to_elements = {r: set(np.where(elem_to_rank == r)[0])
                         for r in range(num_ranks)}
     # make an array of the geometrically even partition sizes
     # avoids calling "len" over and over on the element sets

From efec82bea23ddd770529b3a0924a0f20abf3f963 Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 18:07:39 -0500
Subject: [PATCH 10/14] fixish

---
 mirgecom/simutil.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index ed52e0da3..60634fcc5 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -528,8 +528,10 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
     # Create geometrically even partitions
     elem_to_rank = ((elem_centroids-x_min) / part_interval).astype(int)
 
+    print(f"{elem_to_rank=}")
+
     # map partition id to list of elements in that partition
-    part_to_elements = {r: set(np.where(elem_to_rank == r)[0])
+    part_to_elements = {r: set((np.where(elem_to_rank == r))[0].flat)
                         for r in range(num_ranks)}
     # make an array of the geometrically even partition sizes
     # avoids calling "len" over and over on the element sets

From 9a89774bf07ea7d62106586b41c8310bd8e1393d Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Wed, 19 Apr 2023 18:13:38 -0500
Subject: [PATCH 11/14] fixish

---
 mirgecom/simutil.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 60634fcc5..8f829683d 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -515,7 +515,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
         elem_group_x = mesh_verts[0, group.vertex_indices]
         elem_group_centroids = np.sum(elem_group_x, axis=1)/elem_group_x.shape[1]
         global_nelements = global_nelements + len(elem_group_centroids)
-        np.concatenate((elem_centroids, elem_group_centroids))
+        elem_centroids = np.concatenate((elem_centroids, elem_group_centroids))
 
     aver_part_nelem = global_nelements / num_ranks
 

From 6f6711704d3459ce0c9b1c085ea75ec9ec0ba0a0 Mon Sep 17 00:00:00 2001
From: Mike Campbell <mtcampbe@illinois.edu>
Date: Thu, 20 Apr 2023 11:03:56 -0500
Subject: [PATCH 12/14] Apply suggestions from code review

Co-authored-by: Matt Smith <mjsmith6@illinois.edu>
---
 mirgecom/simutil.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 8f829683d..0826ffc17 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -484,19 +484,18 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
     elem_to_rank: numpy.ndarray
         Array indicating the MPI rank for each element
     """
-    debug = True
     mesh_dimension = mesh.dim
     if nranks_per_axis is None or num_ranks is not None:
         from warnings import warn
         warn("num_ranks is deprecated, use nranks_per_axis instead.")
         num_ranks = num_ranks or 1
-        nranks_per_axis = np.ones(mesh_dimension, dtype=np.int8)
+        nranks_per_axis = np.ones(mesh_dimension, dtype=np.int32)
         nranks_per_axis[0] = num_ranks
     if len(nranks_per_axis) != mesh_dimension:
-        raise PartitioningError("nranks_per_axis must match mesh dimension.")
+        raise ValueError("nranks_per_axis must match mesh dimension.")
     num_ranks = np.prod(nranks_per_axis)
     if np.prod(nranks_per_axis[1:]) != 1:
-        raise PartitioningError("geometric_mesh_partitioner currently only supports"
+        raise NotImplementedError("geometric_mesh_partitioner currently only supports"
                                 " partitioning in the X-dimension."
                                 "(only nranks_per_axis[0] should be > 1).")
     mesh_verts = mesh.vertices
@@ -509,13 +508,13 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
 
     part_interval = x_interval / nranks_per_axis[0]
 
-    global_nelements = 0
-    elem_centroids = np.array([])
+    all_elem_group_centroids = []
     for group in mesh.groups:
         elem_group_x = mesh_verts[0, group.vertex_indices]
         elem_group_centroids = np.sum(elem_group_x, axis=1)/elem_group_x.shape[1]
-        global_nelements = global_nelements + len(elem_group_centroids)
-        elem_centroids = np.concatenate((elem_centroids, elem_group_centroids))
+        all_elem_group_centroids.append(elem_group_centroids)
+    elem_centroids = np.concatenate(all_elem_group_centroids)
+    global_nelements = len(elem_centroids)
 
     aver_part_nelem = global_nelements / num_ranks
 

From 541916171a1063771c4fc384da954d9cc38019bc Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Thu, 20 Apr 2023 11:07:03 -0500
Subject: [PATCH 13/14] Fixishs

---
 mirgecom/simutil.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 0826ffc17..032f521ac 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -495,8 +495,8 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
         raise ValueError("nranks_per_axis must match mesh dimension.")
     num_ranks = np.prod(nranks_per_axis)
     if np.prod(nranks_per_axis[1:]) != 1:
-        raise NotImplementedError("geometric_mesh_partitioner currently only supports"
-                                " partitioning in the X-dimension."
+        raise NotImplementedError("geometric_mesh_partitioner currently only "
+                                "supports partitioning in the X-dimension."
                                 "(only nranks_per_axis[0] should be > 1).")
     mesh_verts = mesh.vertices
     mesh_x = mesh_verts[0]
@@ -530,7 +530,7 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
     print(f"{elem_to_rank=}")
 
     # map partition id to list of elements in that partition
-    part_to_elements = {r: set((np.where(elem_to_rank == r))[0].flat)
+    part_to_elements = {r: set(np.where(elem_to_rank == r)[0])
                         for r in range(num_ranks)}
     # make an array of the geometrically even partition sizes
     # avoids calling "len" over and over on the element sets

From 4b617d2149a012e4119753c3c53df1054ee2821d Mon Sep 17 00:00:00 2001
From: Michael Campbell <mtcampbe@illinois.edu>
Date: Thu, 20 Apr 2023 11:13:13 -0500
Subject: [PATCH 14/14] Unredundatize adv_part tracking.

---
 mirgecom/simutil.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/mirgecom/simutil.py b/mirgecom/simutil.py
index 032f521ac..dc878cde7 100644
--- a/mirgecom/simutil.py
+++ b/mirgecom/simutil.py
@@ -545,16 +545,16 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
         for r in range(num_ranks-1):
 
             # find the element reservoir (next part with elements in it)
-            adv_part = r + 1
-            while nelem_part[adv_part] == 0:
-                adv_part = adv_part + 1
+            # adv_part = r + 1
+            # while nelem_part[adv_part] == 0:
+            #    adv_part = adv_part + 1
 
             num_elem_needed = aver_part_nelem - nelem_part[r]
             part_imbalance = np.abs(num_elem_needed) / float(aver_part_nelem)
 
             if debug:
                 print(f"Processing part({r=})")
-                print(f"{part_loc[r]=}, {adv_part=}")
+                print(f"{part_loc[r]=}")
                 print(f"{num_elem_needed=}, {part_imbalance=}")
                 print(f"{nelem_part=}")
 
@@ -562,8 +562,10 @@ def geometric_mesh_partitioner(mesh, num_ranks=None, *, nranks_per_axis=None,
             total_change = 0
             moved_elements = set()
 
-            while ((part_imbalance > imbalance_tolerance)
-                   and (adv_part < num_ranks)):
+            adv_part = r + 1
+            # while ((part_imbalance > imbalance_tolerance)
+            #       and (adv_part < num_ranks)):
+            while part_imbalance > imbalance_tolerance:
                 # This partition needs to keep changing in size until it meets the
                 # specified imbalance tolerance, or gives up trying