From c5c961baab783f9ae2c68bc155d323dcd23029d5 Mon Sep 17 00:00:00 2001
From: Tulio <tulioricci@gmail.com>
Date: Mon, 10 Apr 2023 13:10:46 -0500
Subject: [PATCH 1/5] Initial commit

---
 examples/burgers-mpi.py     |  59 +++++----
 examples/linear_wave-mpi.py | 250 ++++++++++++++++++++++++++++++++++++
 2 files changed, 283 insertions(+), 26 deletions(-)
 create mode 100644 examples/linear_wave-mpi.py

diff --git a/examples/burgers-mpi.py b/examples/burgers-mpi.py
index a634120f1..da549637a 100644
--- a/examples/burgers-mpi.py
+++ b/examples/burgers-mpi.py
@@ -53,26 +53,30 @@
                                          logmgr_add_device_memory_usage,
                                          logmgr_add_mempool_usage,)
 
+import sys
 
 def utx(actx, nodes, t=0):
     """Initialize the field for Burgers Eqn."""
+    x = nodes[0]
+    return actx.np.sin(2.0*np.pi*nodes[0]/2.0) 
 
-    x = nodes[0] - 3.0*t
-    ones = 0*x + 1.0
-    return actx.np.where(x < -0.5, ones, 2*ones) 
-
+#    """Create a bump."""
+#    return actx.np.exp(-(nodes[0]-1.0)**2/0.1**2)
 
 def _facial_flux(dcoll, u_tpair):
 
     actx = u_tpair.int.array_context
     normal = actx.thaw(dcoll.normal(u_tpair.dd))
     u2_pair = TracePair(dd=u_tpair.dd,
-                        interior=u_tpair.int**2,
-                        exterior=u_tpair.ext**2)
-    flux_weak = .5*u2_pair.avg*normal[0]
+                        interior=0.5*u_tpair.int**2,
+                        exterior=0.5*u_tpair.ext**2)
+
+    # average
+    flux_weak = 0.5*(u2_pair.ext + u2_pair.int)*normal[0]
 
-    # dissipation term
-    flux_weak += .25*u_tpair.diff*normal[0]
+#    # dissipation term
+#    lamb = actx.np.abs( actx.np.maximum( u_tpair.int, u_tpair.ext ) )
+#    flux_weak = flux_weak + .5*lamb*u_tpair.diff*normal[0]
 
     return op.project(dcoll, u_tpair.dd, "all_faces", flux_weak)
 
@@ -104,13 +108,13 @@ def burgers_operator(dcoll, u, u_bc, *, comm_tag=None):
     u_bnd = op.project(dcoll, "vol", BTAG_ALL, u)
     itp = interior_trace_pairs(dcoll, u, comm_tag=(_BurgTag, comm_tag))
     
-    el_bnd_flux = (_facial_flux(dcoll,
-                                u_tpair=TracePair(BTAG_ALL, interior=u_bnd,
-                                                  exterior=u_bc))
-                   + sum([_facial_flux(dcoll, u_tpair=tpair)
-                          for tpair in itp]))
-    vol_flux = 0.5*u*u
-    return -op.inverse_mass(dcoll, vol_flux -
+    #FIXME maybe flip the sign of u_bc to enforce 0 at the boundary
+    el_bnd_flux = (
+        _facial_flux(dcoll, u_tpair=TracePair(BTAG_ALL,
+                                              interior=u_bnd, exterior=u_bc))
+        + sum([_facial_flux(dcoll, u_tpair=tpair) for tpair in itp]))
+    vol_flux = -op.weak_local_grad(dcoll, 0.5*u**2)[0]
+    return -op.inverse_mass(dcoll, vol_flux +
                             op.face_mass(dcoll, el_bnd_flux))
 
 
@@ -127,7 +131,7 @@ def main(actx_class, snapshot_pattern="burgers-mpi-{step:04d}-{rank:04d}.pkl",
     num_parts = comm.Get_size()
 
     logmgr = initialize_logmgr(use_logmgr,
-        filename="burgers-mpi.sqlite", mode="wu", mpi_comm=comm)
+        filename="burgers-mpi.sqlite", mode="wo", mpi_comm=comm)
     if use_profiling:
         queue = cl.CommandQueue(cl_ctx,
             properties=cl.command_queue_properties.PROFILING_ENABLE)
@@ -148,12 +152,12 @@ def main(actx_class, snapshot_pattern="burgers-mpi-{step:04d}-{rank:04d}.pkl",
         mesh_dist = MPIMeshDistributor(comm)
 
         dim = 1
-        nel_1d = 20
+        nel_1d = 200
 
         if mesh_dist.is_mananger_rank():
             from meshmode.mesh.generation import generate_regular_rect_mesh
             mesh = generate_regular_rect_mesh(
-                a=(-1.0,)*dim, b=(1.0,)*dim,
+                a=(0.0,)*dim, b=(2.0,)*dim,
                 nelements_per_axis=(nel_1d,)*dim)
 
             print("%d elements" % mesh.nelements)
@@ -175,18 +179,18 @@ def main(actx_class, snapshot_pattern="burgers-mpi-{step:04d}-{rank:04d}.pkl",
         nel_1d = restart_data["nel_1d"]
         assert comm.Get_size() == restart_data["num_parts"]
 
-    order = 8
+    order = 2
     dcoll = create_discretization_collection(actx, local_mesh, order=order)
     nodes = actx.thaw(dcoll.nodes())
 
     current_cfl = 0.485
-    wave_speed = 300.0
+    wave_speed = 1.0
     from grudge.dt_utils import characteristic_lengthscales
     nodal_dt = characteristic_lengthscales(actx, dcoll) / wave_speed
 
     dt = actx.to_numpy(current_cfl * op.nodal_min(dcoll, "vol", nodal_dt))[()]
 
-    t_final = 1
+    t_final = 1.0
 
     if restart_step is None:
         t = 0
@@ -244,8 +248,8 @@ def rhs(t, u):
     u = force_evaluation(actx, u)
     compiled_rhs = actx.compile(rhs)
 
-    restart_n = 100
-    viz_n = 1
+    restart_n = 1000
+    viz_n = 10
 
     while t < t_final:
         if logmgr:
@@ -273,15 +277,18 @@ def rhs(t, u):
             print(istep, t, actx.to_numpy(op.norm(dcoll, u, np.inf)))
             vis.write_parallel_vtk_file(
                 comm,
-                "burgers-mpi-%03d-%04d.vtu" % (rank, istep),
+                "burgers-mpi-%04d-%04d.vtu" % (rank, istep),
                 [
-                    ("u", u)
+                    ("u", u),
+                    ("x", nodes[0])
                 ], overwrite=True
             )
 
         d = rk4_step(u, t, dt, compiled_rhs)
         u  = force_evaluation(actx, d)
 
+#        break
+
         t += dt
         istep += 1
 
diff --git a/examples/linear_wave-mpi.py b/examples/linear_wave-mpi.py
new file mode 100644
index 000000000..288bc7427
--- /dev/null
+++ b/examples/linear_wave-mpi.py
@@ -0,0 +1,250 @@
+"""Demonstrate linear wave equation example."""
+
+__copyright__ = "Copyright (C) 2020 University of Illinos Board of Trustees"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+import numpy.linalg as la  # noqa
+import pyopencl as cl
+import pyopencl.array as cla  # noqa
+
+from grudge.shortcuts import make_visualizer
+import grudge.op as op
+
+from mirgecom.discretization import create_discretization_collection
+from mirgecom.integrators import rk4_step
+from mirgecom.utils import force_evaluation
+
+from mirgecom.profiling import PyOpenCLProfilingArrayContext
+
+from logpyle import IntervalTimer, set_dt
+
+from mirgecom.logging_quantities import (initialize_logmgr,
+                                         logmgr_add_cl_device_info,
+                                         logmgr_add_device_memory_usage,
+                                         logmgr_add_mempool_usage)
+
+from meshmode.mesh import BTAG_ALL, BTAG_NONE  # noqa
+from grudge.trace_pair import TracePair, interior_trace_pairs
+
+
+def bump(actx, nodes):
+    """Create a bump."""
+    return actx.np.exp(-(nodes[0] - 0.0)**2/0.1**2)
+
+
+
+def _flux(dcoll, c, w_tpair):
+    u = w_tpair
+
+    actx = w_tpair.int.array_context
+    normal = actx.thaw(dcoll.normal(w_tpair.dd))
+
+    flux_weak = normal[0]*u.avg
+
+    # upwind
+    flux_weak = flux_weak + 0.5*(u.ext-u.int)*normal[0]
+
+    return op.project(dcoll, w_tpair.dd, "all_faces", c*flux_weak)
+
+
+class _WaveTag:
+    pass
+
+
+def linear_wave_operator(dcoll, c, w, *, comm_tag=None):
+    """Compute the RHS of the linear wave equation.
+
+    Parameters
+    ----------
+    dcoll: grudge.discretization.DiscretizationCollection
+        the discretization collection to use
+    c: float
+        the (constant) wave speed
+    w: numpy.ndarray
+        an object array of DOF arrays, representing the state vector
+    comm_tag: Hashable
+        Tag for distributed communication
+
+    Returns
+    -------
+    numpy.ndarray
+        an object array of DOF arrays, representing the ODE RHS
+    """
+    u = w
+
+    dir_u = op.project(dcoll, "vol", BTAG_ALL, u)
+    dir_bval = dir_u
+    dir_bc = -dir_u*0.0
+
+    return -(
+        op.inverse_mass(dcoll, -c*op.weak_local_grad(dcoll, u)[0]
+        +  # noqa: W504
+        op.face_mass(dcoll,
+            _flux(dcoll, c=c,
+                  w_tpair=TracePair(BTAG_ALL, interior=dir_bval,
+                                    exterior=dir_bc))
+            + sum(
+                _flux(dcoll, c=c, w_tpair=tpair)
+                for tpair in interior_trace_pairs(
+                    dcoll, w, comm_tag=(_WaveTag, comm_tag)))
+            )
+        )
+    )
+
+
+
+
+
+def main(actx_class, use_profiling=False, use_logmgr=False, lazy: bool = False):
+    """Drive the example."""
+    cl_ctx = cl.create_some_context()
+
+    logmgr = initialize_logmgr(use_logmgr,
+        filename="wave.sqlite", mode="wo")
+
+    from mirgecom.simutil import get_reasonable_memory_pool
+
+    if use_profiling:
+        if lazy:
+            raise RuntimeError("Cannot run lazy with profiling.")
+        queue = cl.CommandQueue(cl_ctx,
+            properties=cl.command_queue_properties.PROFILING_ENABLE)
+
+        alloc = get_reasonable_memory_pool(cl_ctx, queue)
+        actx = PyOpenCLProfilingArrayContext(queue, allocator=alloc)
+    else:
+        queue = cl.CommandQueue(cl_ctx)
+        alloc = get_reasonable_memory_pool(cl_ctx, queue)
+
+        if lazy:
+            actx = actx_class(queue, allocator=alloc)
+        else:
+            actx = actx_class(queue, allocator=alloc,
+                                        force_device_scalars=True)
+
+    dim = 1
+    nel_1d = 100
+    from meshmode.mesh.generation import generate_regular_rect_mesh
+
+    mesh = generate_regular_rect_mesh(
+        a=(-2,)*dim,
+        b=(+2,)*dim,
+        nelements_per_axis=(nel_1d,)*dim)
+
+    order = 3
+
+    dcoll = create_discretization_collection(actx, mesh, order=order)
+    nodes = actx.thaw(dcoll.nodes())
+
+    current_cfl = 0.2
+    wave_speed = 1.0
+    from grudge.dt_utils import characteristic_lengthscales
+    nodal_dt = characteristic_lengthscales(actx, dcoll) / wave_speed
+    dt = actx.to_numpy(current_cfl * op.nodal_min(dcoll, "vol", nodal_dt))[()]
+
+    print("%d elements" % mesh.nelements)
+
+    fields = bump(actx, nodes)
+
+    if logmgr:
+        logmgr_add_cl_device_info(logmgr, queue)
+        logmgr_add_device_memory_usage(logmgr, queue)
+        logmgr_add_mempool_usage(logmgr, alloc)
+
+        logmgr.add_watches(["step.max", "t_step.max", "t_log.max"])
+
+        try:
+            logmgr.add_watches(["memory_usage_python.max", "memory_usage_gpu.max"])
+        except KeyError:
+            pass
+
+        if use_profiling:
+            logmgr.add_watches(["multiply_time.max"])
+
+        vis_timer = IntervalTimer("t_vis", "Time spent visualizing")
+        logmgr.add_quantity(vis_timer)
+
+    vis = make_visualizer(dcoll)
+
+    def rhs(t, w):
+        return linear_wave_operator(dcoll, c=wave_speed, w=w)
+
+    compiled_rhs = actx.compile(rhs)
+    fields = force_evaluation(actx, fields)
+
+    t = 0
+    t_final = 1
+    istep = 0
+    while t < t_final:
+        if logmgr:
+            logmgr.tick_before()
+
+        if istep % 10 == 0:
+            print(istep, t, actx.to_numpy(op.norm(dcoll, fields, np.inf)))
+            if use_profiling:
+                print(actx.tabulate_profiling_data())
+            vis.write_vtk_file("linear-wave-%04d.vtu" % istep,
+                    [
+                        ("u", fields),
+                        ("x", nodes[0]),
+                        ], overwrite=True)
+
+        fields = rk4_step(fields, t, dt, compiled_rhs)
+        fields = force_evaluation(actx, fields)
+
+        t += dt
+        istep += 1
+
+        if logmgr:
+            set_dt(logmgr, dt)
+            logmgr.tick_after()
+
+    vis.write_vtk_file("linear-wave-%04d.vtu" % istep,
+            [
+                ("u", fields),
+                ("x", nodes[0]),
+                ], overwrite=True)
+
+    if logmgr:
+        logmgr.close()
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Wave (non-MPI version)")
+    parser.add_argument("--profiling", action="store_true",
+        help="enable kernel profiling")
+    parser.add_argument("--log", action="store_true",
+        help="enable logging")
+    parser.add_argument("--lazy", action="store_true",
+        help="enable lazy evaluation")
+    args = parser.parse_args()
+
+    from grudge.array_context import get_reasonable_array_context_class
+    actx_class = get_reasonable_array_context_class(lazy=args.lazy,
+                                                    distributed=False)
+
+    main(actx_class, use_profiling=args.profiling,
+         use_logmgr=args.log, lazy=args.lazy)
+
+# vim: foldmethod=marker

From b7a213d902b1304b703b7adf9ef439aa57e65761 Mon Sep 17 00:00:00 2001
From: Tulio <tulioricci@gmail.com>
Date: Mon, 10 Apr 2023 13:23:08 -0500
Subject: [PATCH 2/5] Flake8

---
 examples/burgers-mpi.py     | 41 ++++++++++---------------------------
 examples/linear_wave-mpi.py |  4 ----
 2 files changed, 11 insertions(+), 34 deletions(-)

diff --git a/examples/burgers-mpi.py b/examples/burgers-mpi.py
index da549637a..077f4ec15 100644
--- a/examples/burgers-mpi.py
+++ b/examples/burgers-mpi.py
@@ -24,18 +24,10 @@
 import logging
 
 import numpy as np
-import numpy.linalg as la  # noqa
 import pyopencl as cl
 from grudge.trace_pair import TracePair, interior_trace_pairs
-from pytools.obj_array import flat_obj_array
 
-from meshmode.mesh import BTAG_ALL, BTAG_NONE  # noqa
-from meshmode.discretization.connection import FACE_RESTR_ALL
-from grudge.dof_desc import (
-    DD_VOLUME_ALL,
-    VolumeDomainTag,
-    DISCR_TAG_BASE,
-)
+from meshmode.mesh import BTAG_ALL
 
 from grudge.shortcuts import make_visualizer
 import grudge.op as op
@@ -44,7 +36,6 @@
 from mirgecom.mpi import mpi_entry_point
 from mirgecom.integrators import rk4_step
 from mirgecom.utils import force_evaluation
-from mirgecom.operators import div_operator
 
 from logpyle import IntervalTimer, set_dt
 
@@ -53,15 +44,17 @@
                                          logmgr_add_device_memory_usage,
                                          logmgr_add_mempool_usage,)
 
-import sys
 
 def utx(actx, nodes, t=0):
     """Initialize the field for Burgers Eqn."""
     x = nodes[0]
-    return actx.np.sin(2.0*np.pi*nodes[0]/2.0) 
+    return actx.np.sin(2.0*np.pi*nodes[0]/2.0)
+
+#    x = nodes[0] - 1.0*t
+#    ones = 0*x + 1.0
+#    zeros = ones*0.0
+#    return actx.np.where(actx.np.less(x, 1.0+1e-7), ones, zeros)
 
-#    """Create a bump."""
-#    return actx.np.exp(-(nodes[0]-1.0)**2/0.1**2)
 
 def _facial_flux(dcoll, u_tpair):
 
@@ -71,12 +64,7 @@ def _facial_flux(dcoll, u_tpair):
                         interior=0.5*u_tpair.int**2,
                         exterior=0.5*u_tpair.ext**2)
 
-    # average
-    flux_weak = 0.5*(u2_pair.ext + u2_pair.int)*normal[0]
-
-#    # dissipation term
-#    lamb = actx.np.abs( actx.np.maximum( u_tpair.int, u_tpair.ext ) )
-#    flux_weak = flux_weak + .5*lamb*u_tpair.diff*normal[0]
+    flux_weak = u2_pair.avg*normal[0]
 
     return op.project(dcoll, u_tpair.dd, "all_faces", flux_weak)
 
@@ -102,20 +90,15 @@ def burgers_operator(dcoll, u, u_bc, *, comm_tag=None):
     numpy.ndarray
         an object array of DOF arrays, representing the ODE RHS
     """
-    dd_vol = DD_VOLUME_ALL
-    dd_allfaces = dd_vol.trace(FACE_RESTR_ALL)
-
     u_bnd = op.project(dcoll, "vol", BTAG_ALL, u)
     itp = interior_trace_pairs(dcoll, u, comm_tag=(_BurgTag, comm_tag))
-    
-    #FIXME maybe flip the sign of u_bc to enforce 0 at the boundary
+
     el_bnd_flux = (
         _facial_flux(dcoll, u_tpair=TracePair(BTAG_ALL,
                                               interior=u_bnd, exterior=u_bc))
         + sum([_facial_flux(dcoll, u_tpair=tpair) for tpair in itp]))
     vol_flux = -op.weak_local_grad(dcoll, 0.5*u**2)[0]
-    return -op.inverse_mass(dcoll, vol_flux +
-                            op.face_mass(dcoll, el_bnd_flux))
+    return -op.inverse_mass(dcoll, vol_flux + op.face_mass(dcoll, el_bnd_flux))
 
 
 @mpi_entry_point
@@ -285,9 +268,7 @@ def rhs(t, u):
             )
 
         d = rk4_step(u, t, dt, compiled_rhs)
-        u  = force_evaluation(actx, d)
-
-#        break
+        u = force_evaluation(actx, d)
 
         t += dt
         istep += 1
diff --git a/examples/linear_wave-mpi.py b/examples/linear_wave-mpi.py
index 288bc7427..5bfd5d7dd 100644
--- a/examples/linear_wave-mpi.py
+++ b/examples/linear_wave-mpi.py
@@ -52,7 +52,6 @@ def bump(actx, nodes):
     return actx.np.exp(-(nodes[0] - 0.0)**2/0.1**2)
 
 
-
 def _flux(dcoll, c, w_tpair):
     u = w_tpair
 
@@ -112,9 +111,6 @@ def linear_wave_operator(dcoll, c, w, *, comm_tag=None):
     )
 
 
-
-
-
 def main(actx_class, use_profiling=False, use_logmgr=False, lazy: bool = False):
     """Drive the example."""
     cl_ctx = cl.create_some_context()

From f0f031881f37ec630bbe4bce8249ccf2725fc26d Mon Sep 17 00:00:00 2001
From: Tulio <tulioricci@gmail.com>
Date: Mon, 10 Apr 2023 13:36:44 -0500
Subject: [PATCH 3/5] Add comment about the derivative

---
 examples/burgers-mpi.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/burgers-mpi.py b/examples/burgers-mpi.py
index 077f4ec15..7343bf411 100644
--- a/examples/burgers-mpi.py
+++ b/examples/burgers-mpi.py
@@ -97,8 +97,11 @@ def burgers_operator(dcoll, u, u_bc, *, comm_tag=None):
         _facial_flux(dcoll, u_tpair=TracePair(BTAG_ALL,
                                               interior=u_bnd, exterior=u_bc))
         + sum([_facial_flux(dcoll, u_tpair=tpair) for tpair in itp]))
-    vol_flux = -op.weak_local_grad(dcoll, 0.5*u**2)[0]
-    return -op.inverse_mass(dcoll, vol_flux + op.face_mass(dcoll, el_bnd_flux))
+
+    # FIXME use weak_local_d_dx instead if 1D
+    # FIXME use weal_local_div instead if dim >= 2
+    vol_flux = op.weak_local_grad(dcoll, 0.5*u**2)[0]
+    return op.inverse_mass(dcoll, vol_flux - op.face_mass(dcoll, el_bnd_flux))
 
 
 @mpi_entry_point

From c837be5dd5ca9ae74696f6e46fcb94bcaa8cffb7 Mon Sep 17 00:00:00 2001
From: Tulio <tulioricci@gmail.com>
Date: Mon, 10 Apr 2023 13:37:53 -0500
Subject: [PATCH 4/5] Flake8

---
 examples/burgers-mpi.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/burgers-mpi.py b/examples/burgers-mpi.py
index 7343bf411..15d8278fe 100644
--- a/examples/burgers-mpi.py
+++ b/examples/burgers-mpi.py
@@ -47,7 +47,6 @@
 
 def utx(actx, nodes, t=0):
     """Initialize the field for Burgers Eqn."""
-    x = nodes[0]
     return actx.np.sin(2.0*np.pi*nodes[0]/2.0)
 
 #    x = nodes[0] - 1.0*t

From 0aaea53a58b8b86123b541b760a7638f2d202099 Mon Sep 17 00:00:00 2001
From: Tulio <tulioricci@gmail.com>
Date: Mon, 10 Apr 2023 22:18:18 -0500
Subject: [PATCH 5/5] Remove linear wave eq

---
 examples/linear_wave-mpi.py | 246 ------------------------------------
 1 file changed, 246 deletions(-)
 delete mode 100644 examples/linear_wave-mpi.py

diff --git a/examples/linear_wave-mpi.py b/examples/linear_wave-mpi.py
deleted file mode 100644
index 5bfd5d7dd..000000000
--- a/examples/linear_wave-mpi.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""Demonstrate linear wave equation example."""
-
-__copyright__ = "Copyright (C) 2020 University of Illinos Board of Trustees"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-import numpy as np
-import numpy.linalg as la  # noqa
-import pyopencl as cl
-import pyopencl.array as cla  # noqa
-
-from grudge.shortcuts import make_visualizer
-import grudge.op as op
-
-from mirgecom.discretization import create_discretization_collection
-from mirgecom.integrators import rk4_step
-from mirgecom.utils import force_evaluation
-
-from mirgecom.profiling import PyOpenCLProfilingArrayContext
-
-from logpyle import IntervalTimer, set_dt
-
-from mirgecom.logging_quantities import (initialize_logmgr,
-                                         logmgr_add_cl_device_info,
-                                         logmgr_add_device_memory_usage,
-                                         logmgr_add_mempool_usage)
-
-from meshmode.mesh import BTAG_ALL, BTAG_NONE  # noqa
-from grudge.trace_pair import TracePair, interior_trace_pairs
-
-
-def bump(actx, nodes):
-    """Create a bump."""
-    return actx.np.exp(-(nodes[0] - 0.0)**2/0.1**2)
-
-
-def _flux(dcoll, c, w_tpair):
-    u = w_tpair
-
-    actx = w_tpair.int.array_context
-    normal = actx.thaw(dcoll.normal(w_tpair.dd))
-
-    flux_weak = normal[0]*u.avg
-
-    # upwind
-    flux_weak = flux_weak + 0.5*(u.ext-u.int)*normal[0]
-
-    return op.project(dcoll, w_tpair.dd, "all_faces", c*flux_weak)
-
-
-class _WaveTag:
-    pass
-
-
-def linear_wave_operator(dcoll, c, w, *, comm_tag=None):
-    """Compute the RHS of the linear wave equation.
-
-    Parameters
-    ----------
-    dcoll: grudge.discretization.DiscretizationCollection
-        the discretization collection to use
-    c: float
-        the (constant) wave speed
-    w: numpy.ndarray
-        an object array of DOF arrays, representing the state vector
-    comm_tag: Hashable
-        Tag for distributed communication
-
-    Returns
-    -------
-    numpy.ndarray
-        an object array of DOF arrays, representing the ODE RHS
-    """
-    u = w
-
-    dir_u = op.project(dcoll, "vol", BTAG_ALL, u)
-    dir_bval = dir_u
-    dir_bc = -dir_u*0.0
-
-    return -(
-        op.inverse_mass(dcoll, -c*op.weak_local_grad(dcoll, u)[0]
-        +  # noqa: W504
-        op.face_mass(dcoll,
-            _flux(dcoll, c=c,
-                  w_tpair=TracePair(BTAG_ALL, interior=dir_bval,
-                                    exterior=dir_bc))
-            + sum(
-                _flux(dcoll, c=c, w_tpair=tpair)
-                for tpair in interior_trace_pairs(
-                    dcoll, w, comm_tag=(_WaveTag, comm_tag)))
-            )
-        )
-    )
-
-
-def main(actx_class, use_profiling=False, use_logmgr=False, lazy: bool = False):
-    """Drive the example."""
-    cl_ctx = cl.create_some_context()
-
-    logmgr = initialize_logmgr(use_logmgr,
-        filename="wave.sqlite", mode="wo")
-
-    from mirgecom.simutil import get_reasonable_memory_pool
-
-    if use_profiling:
-        if lazy:
-            raise RuntimeError("Cannot run lazy with profiling.")
-        queue = cl.CommandQueue(cl_ctx,
-            properties=cl.command_queue_properties.PROFILING_ENABLE)
-
-        alloc = get_reasonable_memory_pool(cl_ctx, queue)
-        actx = PyOpenCLProfilingArrayContext(queue, allocator=alloc)
-    else:
-        queue = cl.CommandQueue(cl_ctx)
-        alloc = get_reasonable_memory_pool(cl_ctx, queue)
-
-        if lazy:
-            actx = actx_class(queue, allocator=alloc)
-        else:
-            actx = actx_class(queue, allocator=alloc,
-                                        force_device_scalars=True)
-
-    dim = 1
-    nel_1d = 100
-    from meshmode.mesh.generation import generate_regular_rect_mesh
-
-    mesh = generate_regular_rect_mesh(
-        a=(-2,)*dim,
-        b=(+2,)*dim,
-        nelements_per_axis=(nel_1d,)*dim)
-
-    order = 3
-
-    dcoll = create_discretization_collection(actx, mesh, order=order)
-    nodes = actx.thaw(dcoll.nodes())
-
-    current_cfl = 0.2
-    wave_speed = 1.0
-    from grudge.dt_utils import characteristic_lengthscales
-    nodal_dt = characteristic_lengthscales(actx, dcoll) / wave_speed
-    dt = actx.to_numpy(current_cfl * op.nodal_min(dcoll, "vol", nodal_dt))[()]
-
-    print("%d elements" % mesh.nelements)
-
-    fields = bump(actx, nodes)
-
-    if logmgr:
-        logmgr_add_cl_device_info(logmgr, queue)
-        logmgr_add_device_memory_usage(logmgr, queue)
-        logmgr_add_mempool_usage(logmgr, alloc)
-
-        logmgr.add_watches(["step.max", "t_step.max", "t_log.max"])
-
-        try:
-            logmgr.add_watches(["memory_usage_python.max", "memory_usage_gpu.max"])
-        except KeyError:
-            pass
-
-        if use_profiling:
-            logmgr.add_watches(["multiply_time.max"])
-
-        vis_timer = IntervalTimer("t_vis", "Time spent visualizing")
-        logmgr.add_quantity(vis_timer)
-
-    vis = make_visualizer(dcoll)
-
-    def rhs(t, w):
-        return linear_wave_operator(dcoll, c=wave_speed, w=w)
-
-    compiled_rhs = actx.compile(rhs)
-    fields = force_evaluation(actx, fields)
-
-    t = 0
-    t_final = 1
-    istep = 0
-    while t < t_final:
-        if logmgr:
-            logmgr.tick_before()
-
-        if istep % 10 == 0:
-            print(istep, t, actx.to_numpy(op.norm(dcoll, fields, np.inf)))
-            if use_profiling:
-                print(actx.tabulate_profiling_data())
-            vis.write_vtk_file("linear-wave-%04d.vtu" % istep,
-                    [
-                        ("u", fields),
-                        ("x", nodes[0]),
-                        ], overwrite=True)
-
-        fields = rk4_step(fields, t, dt, compiled_rhs)
-        fields = force_evaluation(actx, fields)
-
-        t += dt
-        istep += 1
-
-        if logmgr:
-            set_dt(logmgr, dt)
-            logmgr.tick_after()
-
-    vis.write_vtk_file("linear-wave-%04d.vtu" % istep,
-            [
-                ("u", fields),
-                ("x", nodes[0]),
-                ], overwrite=True)
-
-    if logmgr:
-        logmgr.close()
-
-
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Wave (non-MPI version)")
-    parser.add_argument("--profiling", action="store_true",
-        help="enable kernel profiling")
-    parser.add_argument("--log", action="store_true",
-        help="enable logging")
-    parser.add_argument("--lazy", action="store_true",
-        help="enable lazy evaluation")
-    args = parser.parse_args()
-
-    from grudge.array_context import get_reasonable_array_context_class
-    actx_class = get_reasonable_array_context_class(lazy=args.lazy,
-                                                    distributed=False)
-
-    main(actx_class, use_profiling=args.profiling,
-         use_logmgr=args.log, lazy=args.lazy)
-
-# vim: foldmethod=marker