diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml
index a3c23be800..343f300936 100644
--- a/.github/workflows/ci-mlir-mpi.yml
+++ b/.github/workflows/ci-mlir-mpi.yml
@@ -38,10 +38,10 @@ jobs:
       run: |
         pip install -e .[tests]
         pip install mpi4py
-        pip install git+https://github.com/xdslproject/xdsl@5500ff6d82d1a920b369615292ba507ecbf92fc9
+        pip install git+https://github.com/xdslproject/xdsl@2db819079d97d006a0d2eb51fa69fe3eecbcc547
 
     - name: Test with MPI
       run: |
         # Add mlir-opt to the path
         export PATH=/xdsl-sc/llvm-project/build/bin/:$PATH
-        pytest -m "parallel" -k "not adjoint" tests/test_xdsl_*
+        pytest -m "parallel" -k "not adjoint" tests/test_xdsl_* -vvv
diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml
index 0515757c09..3e399003a0 100644
--- a/.github/workflows/ci-mlir.yml
+++ b/.github/workflows/ci-mlir.yml
@@ -38,7 +38,7 @@ jobs:
       run: |
         pip install -e .[tests]
         pip install mpi4py
-        pip install git+https://github.com/xdslproject/xdsl@5500ff6d82d1a920b369615292ba507ecbf92fc9
+        pip install git+https://github.com/xdslproject/xdsl@2db819079d97d006a0d2eb51fa69fe3eecbcc547
         
     - name: Test no-MPI, no-Openmp
       run: |
@@ -46,7 +46,7 @@ jobs:
         export DEVITO_LANGUAGE=C
         # Add mlir-opt to the path
         export PATH=/xdsl-sc/llvm-project/build/bin/:$PATH
-        pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_*
+        pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_* -vvv
 
     - name: Test no-MPI, Openmp
       run: |
@@ -54,4 +54,4 @@ jobs:
           export DEVITO_LANGUAGE=openmp
           # Add mlir-opt to the path
           export PATH=/xdsl-sc/llvm-project/build/bin/:$PATH
-          pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_*
+          pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_* -vvv
diff --git a/devito/core/cpu.py b/devito/core/cpu.py
index a1ca16cfd8..4d7c326248 100644
--- a/devito/core/cpu.py
+++ b/devito/core/cpu.py
@@ -1,6 +1,7 @@
 from contextlib import redirect_stdout
 import io
 import os
+import sys
 import subprocess
 import ctypes
 import tempfile
@@ -9,6 +10,7 @@
 from collections import OrderedDict
 
 from functools import partial
+from typing import Iterable
 
 from devito.core.operator import CoreOperator, CustomOperator, ParTile
 from devito.exceptions import InvalidOperator
@@ -370,21 +372,42 @@ def _jit_compile(self):
                 # TODO More detailed error handling manually,
                 # instead of relying on a bash-only feature.
 
-                # xdsl-opt, get xDSL IR
-                # TODO: Remove quotes in pipeline; currently workaround with [1:-1]
-                xdsl = xDSLOptMain(args=[source_name, "-p", xdsl_pipeline[1:-1]])
+                # Run the first pipeline, mostly xDSL-centric
+                xdsl_args = [source_name,
+                             "--allow-unregistered-dialect",
+                             "-p",
+                             xdsl_pipeline[1:-1],]
+                # We use the Python API to run xDSL rather than a subprocess
+                # This avoids reimport overhead
+                xdsl = xDSLOptMain(args=xdsl_args)
                 out = io.StringIO()
+                perf("-----------------")
+                perf(f"xdsl-opt {' '.join(xdsl_args)}")
                 with redirect_stdout(out):
                     xdsl.run()
 
-                # mlir-opt
-                mlir_cmd = f'mlir-opt -p {mlir_pipeline}'
-                out = self.compile(mlir_cmd, out.getvalue())
-                #  Printer().print(out)
+                # To use as input in the next stage
+                out.seek(0)
+                # Run the second pipeline, mostly MLIR-centric
+                xdsl_mlir_args = ["--allow-unregistered-dialect",
+                                  "-p",
+                                  mlir_pipeline]
+                # We drive it though xDSL rather than a mlir-opt call for:
+                # - ability to use xDSL replacement passes in the middle
+                # - Avoiding complex process cmanagement code here: xDSL provides
+                xdsl = xDSLOptMain(args=xdsl_mlir_args)
+                out2 = io.StringIO()
+                perf("-----------------")
+                perf(f"xdsl-opt {' '.join(xdsl_mlir_args)}")
+                with redirect_stdout(out2):
+                    old_stdin = sys.stdin
+                    sys.stdin = out
+                    xdsl.run()
+                    sys.stdin = old_stdin
 
+                # mlir-translate to translate to LLVM-IR
                 mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir'
-                out = self.compile(mlir_translate_cmd, out)
-                # Printer().print(out)
+                out = self.compile(mlir_translate_cmd, out2.getvalue())
 
                 # Compile with clang and get LLVM-IR
                 clang_cmd = f'{cc} {cflags} -o {self._tf.name} {self._interop_tf.name} -xir -'  # noqa
@@ -634,20 +657,42 @@ def _jit_compile(self):
 
                 # xdsl-opt, get xDSL IR
                 # TODO: Remove quotes in pipeline; currently workaround with [1:-1]
-                xdsl = xDSLOptMain(args=[source_name, "-p", xdsl_pipeline[1:-1]])
+                # Run the first pipeline, mostly xDSL-centric
+                xdsl_args = [source_name,
+                             "--allow-unregistered-dialect",
+                             "-p",
+                             xdsl_pipeline[1:-1],]
+                # We use the Python API to run xDSL rather than a subprocess
+                # This avoids reimport overhead
+                xdsl = xDSLOptMain(args=xdsl_args)
                 out = io.StringIO()
+                perf("-----------------")
+                perf(f"xdsl-opt {' '.join(xdsl_args)}")
                 with redirect_stdout(out):
                     xdsl.run()
 
-                # mlir-opt
-                mlir_cmd = f'mlir-opt -p {mlir_pipeline}'
-                out = self.compile(mlir_cmd, out.getvalue())
-
-                # Printer().print(out)
+                # To use as input in the next stage
+                out.seek(0)
+                # Run the second pipeline, mostly MLIR-centric
+                xdsl_mlir_args = ["--allow-unregistered-dialect",
+                                  "-p",
+                                  mlir_pipeline]
+                # We drive it though xDSL rather than a mlir-opt call for:
+                # - ability to use xDSL replacement passes in the middle
+                # - Avoiding complex process cmanagement code here: xDSL provides
+                xdsl = xDSLOptMain(args=xdsl_mlir_args)
+                out2 = io.StringIO()
+                perf("-----------------")
+                perf(f"xdsl-opt {' '.join(xdsl_mlir_args)}")
+                with redirect_stdout(out2):
+                    old_stdin = sys.stdin
+                    sys.stdin = out
+                    xdsl.run()
+                    sys.stdin = old_stdin
 
+                # mlir-translate to translate to LLVM-IR
                 mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir'
-                out = self.compile(mlir_translate_cmd, out)
-                # Printer().print(out)
+                out = self.compile(mlir_translate_cmd, out2.getvalue())
 
                 # Compile with clang and get LLVM-IR
                 clang_cmd = f'{cc} {cflags} -o {self._tf.name} {self._interop_tf.name} -xir -'  # noqa
@@ -791,7 +836,7 @@ class Cpu64FsgOmpOperator(Cpu64FsgOperator):
 
 def generate_MLIR_CPU_PIPELINE():
     passes = [
-        "builtin.module(canonicalize",
+        "canonicalize",
         "cse",
         "loop-invariant-code-motion",
         "canonicalize",
@@ -808,15 +853,15 @@ def generate_MLIR_CPU_PIPELINE():
         "convert-func-to-llvm{use-bare-ptr-memref-call-conv}",
         "finalize-memref-to-llvm",
         "canonicalize",
-        "cse)"
+        "cse"
     ]
 
-    return generate_pipeline(passes)
+    return generate_mlir_pipeline(passes)
 
 
 def generate_MLIR_CPU_noop_PIPELINE():
     passes = [
-        "builtin.module(canonicalize",
+        "canonicalize",
         "cse",
         # "remove-dead-values",
         "canonicalize",
@@ -825,15 +870,15 @@ def generate_MLIR_CPU_noop_PIPELINE():
         "convert-math-to-llvm",
         "convert-func-to-llvm{use-bare-ptr-memref-call-conv}",
         "finalize-memref-to-llvm",
-        "canonicalize)",
+        "canonicalize",
     ]
 
-    return generate_pipeline(passes)
+    return generate_mlir_pipeline(passes)
 
 
 def generate_MLIR_OPENMP_PIPELINE():
     passes = [
-        "builtin.module(canonicalize",
+        "canonicalize",
         "cse",
         "loop-invariant-code-motion",
         "canonicalize",
@@ -858,10 +903,10 @@ def generate_MLIR_OPENMP_PIPELINE():
         # "reconcile-unrealized-casts",
         "canonicalize",
         # "print-ir",
-        "cse)"
+        "cse"
     ]
 
-    return generate_pipeline(passes)
+    return generate_mlir_pipeline(passes)
 
 
 def generate_XDSL_CPU_PIPELINE(nb_tiled_dims):
@@ -899,11 +944,16 @@ def generate_XDSL_MPI_PIPELINE(decomp, nb_tiled_dims):
     return generate_pipeline(passes)
 
 
-def generate_pipeline(passes):
+def generate_pipeline(passes: Iterable[str]):
     passes_string = ",".join(passes)
     return f'"{passes_string}"'
 
 
+def generate_mlir_pipeline(passes: Iterable[str]):
+    passes_string = ",".join(passes)
+    return f'mlir-opt[{passes_string}]'
+
+
 # small interop shim script for stuff that we don't want to implement in mlir-ir
 _INTEROP_C = """
 #include <time.h>
diff --git a/devito/core/gpu.py b/devito/core/gpu.py
index 8d4fe4f5a7..cfb0429c30 100644
--- a/devito/core/gpu.py
+++ b/devito/core/gpu.py
@@ -1,6 +1,7 @@
 from contextlib import redirect_stdout
 import io
 import os
+import sys
 from functools import partial
 from io import StringIO
 
@@ -9,7 +10,7 @@
 
 from devito.core.operator import CoreOperator, CustomOperator, ParTile
 
-from devito.core.cpu import XdslAdvOperator, generate_pipeline
+from devito.core.cpu import XdslAdvOperator, generate_mlir_pipeline, generate_pipeline
 
 from devito.exceptions import InvalidOperator
 from devito.operator.operator import rcompile
@@ -439,20 +440,42 @@ def _jit_compile(self):
 
                 # xdsl-opt, get xDSL IR
                 # TODO: Remove quotes in pipeline; currently workaround with [1:-1]
-                xdsl = xDSLOptMain(args=[source_name, "-p", xdsl_pipeline[1:-1]])
+                # Run the first pipeline, mostly xDSL-centric
+                xdsl_args = [source_name,
+                             "--allow-unregistered-dialect",
+                             "-p",
+                             xdsl_pipeline[1:-1],]
+                # We use the Python API to run xDSL rather than a subprocess
+                # This avoids reimport overhead
+                xdsl = xDSLOptMain(args=xdsl_args)
                 out = io.StringIO()
+                perf("-----------------")
+                perf(f"xdsl-opt {' '.join(xdsl_args)}")
                 with redirect_stdout(out):
                     xdsl.run()
 
-                # mlir-opt
-                mlir_cmd = f'mlir-opt -p {mlir_pipeline}'
-                out = self.compile(mlir_cmd, out.getvalue())
-
-                # Printer().print(out)
+                # To use as input in the next stage
+                out.seek(0)
+                # Run the second pipeline, mostly MLIR-centric
+                xdsl_mlir_args = ["--allow-unregistered-dialect",
+                                  "-p",
+                                  mlir_pipeline]
+                # We drive it though xDSL rather than a mlir-opt call for:
+                # - ability to use xDSL replacement passes in the middle
+                # - Avoiding complex process cmanagement code here: xDSL provides
+                xdsl = xDSLOptMain(args=xdsl_mlir_args)
+                out2 = io.StringIO()
+                perf("-----------------")
+                perf(f"xdsl-opt {' '.join(xdsl_mlir_args)}")
+                with redirect_stdout(out2):
+                    old_stdin = sys.stdin
+                    sys.stdin = out
+                    xdsl.run()
+                    sys.stdin = old_stdin
 
+                # mlir-translate to translate to LLVM-IR
                 mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir'
-                out = self.compile(mlir_translate_cmd, out)
-                # Printer().print(out)
+                out = self.compile(mlir_translate_cmd, out2.getvalue())
 
                 # Compile with clang and get LLVM-IR
                 clang_cmd = f'{cc} {cflags} -o {self._tf.name} {self._interop_tf.name} -xir -'  # noqa
@@ -562,7 +585,7 @@ def generate_XDSL_GPU_PIPELINE():
 # gpu-launch-sink-index-computations seemed to have no impact
 def generate_MLIR_GPU_PIPELINE(block_sizes):
     passes = [
-        "builtin.module(test-math-algebraic-simplification",
+        "test-math-algebraic-simplification",
         f"scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}}",
         "func.func(gpu-map-parallel-loops)",
         "convert-parallel-loops-to-gpu",
@@ -593,7 +616,7 @@ def generate_MLIR_GPU_PIPELINE(block_sizes):
         "gpu-to-llvm",
         "gpu-module-to-binary",
         "canonicalize",
-        "cse)"
+        "cse"
     ]
 
-    return generate_pipeline(passes)
+    return generate_mlir_pipeline(passes)