diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index a3c23be800..343f300936 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -38,10 +38,10 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@5500ff6d82d1a920b369615292ba507ecbf92fc9 + pip install git+https://github.com/xdslproject/xdsl@2db819079d97d006a0d2eb51fa69fe3eecbcc547 - name: Test with MPI run: | # Add mlir-opt to the path export PATH=/xdsl-sc/llvm-project/build/bin/:$PATH - pytest -m "parallel" -k "not adjoint" tests/test_xdsl_* + pytest -m "parallel" -k "not adjoint" tests/test_xdsl_* -vvv diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 0515757c09..3e399003a0 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -38,7 +38,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@5500ff6d82d1a920b369615292ba507ecbf92fc9 + pip install git+https://github.com/xdslproject/xdsl@2db819079d97d006a0d2eb51fa69fe3eecbcc547 - name: Test no-MPI, no-Openmp run: | @@ -46,7 +46,7 @@ jobs: export DEVITO_LANGUAGE=C # Add mlir-opt to the path export PATH=/xdsl-sc/llvm-project/build/bin/:$PATH - pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_* + pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_* -vvv - name: Test no-MPI, Openmp run: | @@ -54,4 +54,4 @@ jobs: export DEVITO_LANGUAGE=openmp # Add mlir-opt to the path export PATH=/xdsl-sc/llvm-project/build/bin/:$PATH - pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_* + pytest -m "not parallel" -k "not adjoint" tests/test_xdsl_* -vvv diff --git a/devito/core/cpu.py b/devito/core/cpu.py index a1ca16cfd8..4d7c326248 100644 --- a/devito/core/cpu.py +++ b/devito/core/cpu.py @@ -1,6 +1,7 @@ from contextlib import redirect_stdout import io import os +import sys import subprocess import ctypes import tempfile @@ -9,6 +10,7 @@ from collections import OrderedDict from functools import partial +from typing import Iterable from devito.core.operator import CoreOperator, CustomOperator, ParTile from devito.exceptions import InvalidOperator @@ -370,21 +372,42 @@ def _jit_compile(self): # TODO More detailed error handling manually, # instead of relying on a bash-only feature. - # xdsl-opt, get xDSL IR - # TODO: Remove quotes in pipeline; currently workaround with [1:-1] - xdsl = xDSLOptMain(args=[source_name, "-p", xdsl_pipeline[1:-1]]) + # Run the first pipeline, mostly xDSL-centric + xdsl_args = [source_name, + "--allow-unregistered-dialect", + "-p", + xdsl_pipeline[1:-1],] + # We use the Python API to run xDSL rather than a subprocess + # This avoids reimport overhead + xdsl = xDSLOptMain(args=xdsl_args) out = io.StringIO() + perf("-----------------") + perf(f"xdsl-opt {' '.join(xdsl_args)}") with redirect_stdout(out): xdsl.run() - # mlir-opt - mlir_cmd = f'mlir-opt -p {mlir_pipeline}' - out = self.compile(mlir_cmd, out.getvalue()) - # Printer().print(out) + # To use as input in the next stage + out.seek(0) + # Run the second pipeline, mostly MLIR-centric + xdsl_mlir_args = ["--allow-unregistered-dialect", + "-p", + mlir_pipeline] + # We drive it though xDSL rather than a mlir-opt call for: + # - ability to use xDSL replacement passes in the middle + # - Avoiding complex process cmanagement code here: xDSL provides + xdsl = xDSLOptMain(args=xdsl_mlir_args) + out2 = io.StringIO() + perf("-----------------") + perf(f"xdsl-opt {' '.join(xdsl_mlir_args)}") + with redirect_stdout(out2): + old_stdin = sys.stdin + sys.stdin = out + xdsl.run() + sys.stdin = old_stdin + # mlir-translate to translate to LLVM-IR mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir' - out = self.compile(mlir_translate_cmd, out) - # Printer().print(out) + out = self.compile(mlir_translate_cmd, out2.getvalue()) # Compile with clang and get LLVM-IR clang_cmd = f'{cc} {cflags} -o {self._tf.name} {self._interop_tf.name} -xir -' # noqa @@ -634,20 +657,42 @@ def _jit_compile(self): # xdsl-opt, get xDSL IR # TODO: Remove quotes in pipeline; currently workaround with [1:-1] - xdsl = xDSLOptMain(args=[source_name, "-p", xdsl_pipeline[1:-1]]) + # Run the first pipeline, mostly xDSL-centric + xdsl_args = [source_name, + "--allow-unregistered-dialect", + "-p", + xdsl_pipeline[1:-1],] + # We use the Python API to run xDSL rather than a subprocess + # This avoids reimport overhead + xdsl = xDSLOptMain(args=xdsl_args) out = io.StringIO() + perf("-----------------") + perf(f"xdsl-opt {' '.join(xdsl_args)}") with redirect_stdout(out): xdsl.run() - # mlir-opt - mlir_cmd = f'mlir-opt -p {mlir_pipeline}' - out = self.compile(mlir_cmd, out.getvalue()) - - # Printer().print(out) + # To use as input in the next stage + out.seek(0) + # Run the second pipeline, mostly MLIR-centric + xdsl_mlir_args = ["--allow-unregistered-dialect", + "-p", + mlir_pipeline] + # We drive it though xDSL rather than a mlir-opt call for: + # - ability to use xDSL replacement passes in the middle + # - Avoiding complex process cmanagement code here: xDSL provides + xdsl = xDSLOptMain(args=xdsl_mlir_args) + out2 = io.StringIO() + perf("-----------------") + perf(f"xdsl-opt {' '.join(xdsl_mlir_args)}") + with redirect_stdout(out2): + old_stdin = sys.stdin + sys.stdin = out + xdsl.run() + sys.stdin = old_stdin + # mlir-translate to translate to LLVM-IR mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir' - out = self.compile(mlir_translate_cmd, out) - # Printer().print(out) + out = self.compile(mlir_translate_cmd, out2.getvalue()) # Compile with clang and get LLVM-IR clang_cmd = f'{cc} {cflags} -o {self._tf.name} {self._interop_tf.name} -xir -' # noqa @@ -791,7 +836,7 @@ class Cpu64FsgOmpOperator(Cpu64FsgOperator): def generate_MLIR_CPU_PIPELINE(): passes = [ - "builtin.module(canonicalize", + "canonicalize", "cse", "loop-invariant-code-motion", "canonicalize", @@ -808,15 +853,15 @@ def generate_MLIR_CPU_PIPELINE(): "convert-func-to-llvm{use-bare-ptr-memref-call-conv}", "finalize-memref-to-llvm", "canonicalize", - "cse)" + "cse" ] - return generate_pipeline(passes) + return generate_mlir_pipeline(passes) def generate_MLIR_CPU_noop_PIPELINE(): passes = [ - "builtin.module(canonicalize", + "canonicalize", "cse", # "remove-dead-values", "canonicalize", @@ -825,15 +870,15 @@ def generate_MLIR_CPU_noop_PIPELINE(): "convert-math-to-llvm", "convert-func-to-llvm{use-bare-ptr-memref-call-conv}", "finalize-memref-to-llvm", - "canonicalize)", + "canonicalize", ] - return generate_pipeline(passes) + return generate_mlir_pipeline(passes) def generate_MLIR_OPENMP_PIPELINE(): passes = [ - "builtin.module(canonicalize", + "canonicalize", "cse", "loop-invariant-code-motion", "canonicalize", @@ -858,10 +903,10 @@ def generate_MLIR_OPENMP_PIPELINE(): # "reconcile-unrealized-casts", "canonicalize", # "print-ir", - "cse)" + "cse" ] - return generate_pipeline(passes) + return generate_mlir_pipeline(passes) def generate_XDSL_CPU_PIPELINE(nb_tiled_dims): @@ -899,11 +944,16 @@ def generate_XDSL_MPI_PIPELINE(decomp, nb_tiled_dims): return generate_pipeline(passes) -def generate_pipeline(passes): +def generate_pipeline(passes: Iterable[str]): passes_string = ",".join(passes) return f'"{passes_string}"' +def generate_mlir_pipeline(passes: Iterable[str]): + passes_string = ",".join(passes) + return f'mlir-opt[{passes_string}]' + + # small interop shim script for stuff that we don't want to implement in mlir-ir _INTEROP_C = """ #include diff --git a/devito/core/gpu.py b/devito/core/gpu.py index 8d4fe4f5a7..cfb0429c30 100644 --- a/devito/core/gpu.py +++ b/devito/core/gpu.py @@ -1,6 +1,7 @@ from contextlib import redirect_stdout import io import os +import sys from functools import partial from io import StringIO @@ -9,7 +10,7 @@ from devito.core.operator import CoreOperator, CustomOperator, ParTile -from devito.core.cpu import XdslAdvOperator, generate_pipeline +from devito.core.cpu import XdslAdvOperator, generate_mlir_pipeline, generate_pipeline from devito.exceptions import InvalidOperator from devito.operator.operator import rcompile @@ -439,20 +440,42 @@ def _jit_compile(self): # xdsl-opt, get xDSL IR # TODO: Remove quotes in pipeline; currently workaround with [1:-1] - xdsl = xDSLOptMain(args=[source_name, "-p", xdsl_pipeline[1:-1]]) + # Run the first pipeline, mostly xDSL-centric + xdsl_args = [source_name, + "--allow-unregistered-dialect", + "-p", + xdsl_pipeline[1:-1],] + # We use the Python API to run xDSL rather than a subprocess + # This avoids reimport overhead + xdsl = xDSLOptMain(args=xdsl_args) out = io.StringIO() + perf("-----------------") + perf(f"xdsl-opt {' '.join(xdsl_args)}") with redirect_stdout(out): xdsl.run() - # mlir-opt - mlir_cmd = f'mlir-opt -p {mlir_pipeline}' - out = self.compile(mlir_cmd, out.getvalue()) - - # Printer().print(out) + # To use as input in the next stage + out.seek(0) + # Run the second pipeline, mostly MLIR-centric + xdsl_mlir_args = ["--allow-unregistered-dialect", + "-p", + mlir_pipeline] + # We drive it though xDSL rather than a mlir-opt call for: + # - ability to use xDSL replacement passes in the middle + # - Avoiding complex process cmanagement code here: xDSL provides + xdsl = xDSLOptMain(args=xdsl_mlir_args) + out2 = io.StringIO() + perf("-----------------") + perf(f"xdsl-opt {' '.join(xdsl_mlir_args)}") + with redirect_stdout(out2): + old_stdin = sys.stdin + sys.stdin = out + xdsl.run() + sys.stdin = old_stdin + # mlir-translate to translate to LLVM-IR mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir' - out = self.compile(mlir_translate_cmd, out) - # Printer().print(out) + out = self.compile(mlir_translate_cmd, out2.getvalue()) # Compile with clang and get LLVM-IR clang_cmd = f'{cc} {cflags} -o {self._tf.name} {self._interop_tf.name} -xir -' # noqa @@ -562,7 +585,7 @@ def generate_XDSL_GPU_PIPELINE(): # gpu-launch-sink-index-computations seemed to have no impact def generate_MLIR_GPU_PIPELINE(block_sizes): passes = [ - "builtin.module(test-math-algebraic-simplification", + "test-math-algebraic-simplification", f"scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}}", "func.func(gpu-map-parallel-loops)", "convert-parallel-loops-to-gpu", @@ -593,7 +616,7 @@ def generate_MLIR_GPU_PIPELINE(block_sizes): "gpu-to-llvm", "gpu-module-to-binary", "canonicalize", - "cse)" + "cse" ] - return generate_pipeline(passes) + return generate_mlir_pipeline(passes)