diff --git a/.github/workflows/buildAndTestStructured.yml b/.github/workflows/buildAndTestStructured.yml index e0234ca813bd..1f5199d4362b 100644 --- a/.github/workflows/buildAndTestStructured.yml +++ b/.github/workflows/buildAndTestStructured.yml @@ -38,7 +38,7 @@ jobs: - name: Install dependencies from apt uses: awalsh128/cache-apt-pkgs-action@v1.4.2 with: - packages: protobuf-compiler libprotobuf-dev libcurl4-gnutls-dev + packages: libcurl4-gnutls-dev version: 1.0 - name: Checkout project diff --git a/.gitmodules b/.gitmodules index 160e1bd2812c..49d6902e37b3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "experimental/iterators/third_party/llvm-project"] path = third_party/llvm-project url = https://github.com/llvm/llvm-project.git -[submodule "third_party/substrait-cpp"] - path = third_party/substrait-cpp - url = https://github.com/substrait-io/substrait-cpp.git diff --git a/CMakeLists.txt b/CMakeLists.txt index c5ed2e45ee0f..34823afe33b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,15 +5,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) "-DLLVM_EXTERNAL_STRUCTURED_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}") endif() -################################################################################ -# Set up dependencies -################################################################################ - -# Required for Substrait. v3.6.1 provided by Ubuntu 20.04 did not work due to -# an incompatibility with `-fno-rtti`, which LLVM uses, while v3.12.4 provided -# by Ubuntu 22.04 worked. Possibly some versions inbetween work as well. -find_package(Protobuf 3.12.0 REQUIRED) - ################################################################################ # Set some variables ################################################################################ @@ -59,11 +50,6 @@ include(TableGen) include(AddLLVM) include(AddMLIR) -################################################################################ -# Dependencies from git sub-modules -################################################################################ -add_subdirectory(third_party) - ################################################################################ # Subdirs to recurse into ################################################################################ diff --git a/README-Substrait.md b/README-Substrait.md index 347757ea8819..6d51603ef4a5 100644 --- a/README-Substrait.md +++ b/README-Substrait.md @@ -1,177 +1,4 @@ -# Substrait Dialect for MLIR +# The Substrait Dialect for MLIR has moved -This project consist of building an input/output dialect in -[MLIR](https://mlir.llvm.org/) for [Substrait](https://substrait.io/), the -cross-language serialization format of database query plans (akin to an -intermediate representation/IR for database queries). The immediate goal is to -create common infrastructure that can be used to implement consumers, producers, -optimizers, and transpilers of Substrait; the more transcending goal is to study -the viability of using modern, general-purpose compiler infrastructure to -implement database query compilers. - -## Motivation - -Substrait defines a serialization format for data-intensive compute operations -similar to relational algebra as they typically occur in database query plans -and similar systems, i.e., an exchange format for database queries. This allows -to separate the development of user frontends such as dataframe libraries or SQL -dialects (aka "Substrait producers") from that of backends such as database -engines (aka "Substrait consumers") and, thus, to interoperate more easily -between different data processing systems. - -While Substrait has significant momentum and finds increasing -[adoption](https://substrait.io/community/powered_by/) in mature systems, it is -only concerned with implementing the *serialization format* of query plans, and -leaves the *handling* of that format and, hence, the *in-memory format* or -*intermediate representation* (IR) of plans up to the systems that adopt it. -This will likely lead to repeated implementation effort for everything else -required to deal with that intermediate representation, including -serialization/desiralization to and from text and other formats, a host-language -representation of the IR such as native classes, error and location tracking, -rewrite engines, rewrite rules, and pass management, common optimizations such -as common sub-expression elimination, and similar. - -This project aims to create a base for any system dealing with Substrait by -building a "dialect" for Substrait in [MLIR](https://mlir.llvm.org/). In a way, -it aims to build an *in-memory* format for the concepts defined by Substrait, -for which the latter only describe their *serialization format*. MLIR is a -generic compiler framework providing infrastructure for writing compilers from -any domain, is part of the LLVM ecosystem, and has an [active -community](https://discourse.llvm.org/c/mlir/31) with -[adoption](https://mlir.llvm.org/users/) from researchers and industry across -many domains. It makes it easy to add new IR consisting of domain-specific -operations, types, attributes, etc., which are organized in dialects (either -in-tree and out-of-tree), as well as rewrites, passes, conversions, -translations, etc. on those dialects. Creating a Substrait dialect and a number -of common related transformations in such a mature framework has the potential -to eliminate some of the repeated effort described above and, thus, to ease and -eventually increase adoption of Substrait. By extension, building out a dialect -for Substrait can show that MLIR is a viable base for any database-style query -compiler. - -## Target Use Cases - -The aim of the Substrait dialect is to support all of the following use cases: - -* Implement the **translation** of the IR of a particular system to or from - Substrait by converting it to or from the Substrait dialect (rather than - Substrait's protobuf messages) and then use the serialization/deserializing - routines from this project. -* Use the Substrait dialect as the **sole in-memory format** for the IR of a - particular system, e.g., parsing some frontend format into its own dialect - and then converting that into the Substrait dialect for export or converting - from the Substrait dialect for import and then translating that into an - execution plan. -* Implement **simplifying and "canonicalizing" transformations** of Substrait - plans such as common sub-expression elimination, dead code elimination, - sub-query/common table-expression inlining, selection and projection - push-down, etc., for example, as part of a producer, consumer, or transpiler. -* Implement **"compatibility rewrites"** that transforms plans that using - features that are unsupported by a particular consumer into equivalent plans - using features that it does support, for example, as part of a producer, - consumer, or transpiler. -* [Stretch] Implement a full-blow *query optimizer* using the dialect for both - logical and physical plans. It is not clear whether this should be done with - this dialect or rather one or two additional ones that are specifically - designed with query optimization in mind. - -## Design Rationale - -The main objective of the Substrait dialect is to allow handling Substrait plans -in MLIR: it replicates the components of Substrait plans as a dialect in order -to be able to tap into MLIR infrastructure. In the [taxonomy of Niu and -Amini](https://www.youtube.com/watch?v=hIt6J1_E21c&t=795s), this means that the -Substrait dialect is both an "input" and an "output" dialect for Substrait. As -such, there is only little freedom in designing the dialect. To guide the design -of the few remaining choices, we shall follow the following rationale (from most -important to least important): - -* Every valid Substrait plan MUST be representable in the dialect. -* Every valid Substrait plan MUST round-trip through the dialect to the same - plan as the input. This includes names and ordering. -* The import routine MUST be able to report all constraint violations of - Substrait plans (such as type mismatches, dangling references, etc.). -* The dialect MAY be able to represent programs that do not correspond to valid - Substrait plans. It MAY be impossible to export those to Substrait. For - example, this allows to represent DAGs of operators rather than just trees. -* Every valid program in the Substrait dialect that can be exported to Substrait - MUST round-trip through Substrait to a *semantically* equivalent program but - MAY be different in terms of names, ordering, used operations, attributes, - etc. -* The dialect SHOULD be understood easily by anyone familiar with Substrait. In - particular, the dialect SHOULD use the same terminilogy as the Substrait - specification wherever applicable. -* The dialect SHOULD follow MLIR conventions, idioms, and best practices. -* The dialect SHOULD reuse types, attributes, operations, and interfaces of - upstream dialects wherever applicable. -* The dialect SHOULD allow simple optimizations and rewrites of Substrait - plans without requiring other dialects. -* The serialization of the dialect (aka its "assembly") MAY change over time. - (In other words, the dialect is not meant as an exchange format between - systems -- that's what Substrait is for.) - -## Features (Inherited by MLIR) - -MLIR provides infrastructure for virtually all aspects of writing a compiler. -The following is a list of features that we inherit by using MLIR: - -* Mostly declarative approach to defining relations and expressions (via - [ODS](https://mlir.llvm.org/docs/DefiningDialects/Operations/)/tablegen). -* Documentation generation from declared relations and expressions (via - [ODS](https://mlir.llvm.org/docs/DefiningDialects/Operations/#operation-documentation)). -* Declarative serialization/parsing to/from human-readable text representation - (via [custom - assembly](https://mlir.llvm.org/docs/DefiningDialects/Operations/#declarative-assembly-format)). -* Syntax high-lighting, auto-complete, as-you-type diagnostics, code navigation, - etc. for the MLIR text format (via an [LSP - server](https://mlir.llvm.org/docs/Tools/MLIRLSP/)). -* (Partially declarative) type deduction framework (via [ODS - constraints](https://mlir.llvm.org/docs/DefiningDialects/Operations/#constraints) - or C++ - [interface](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Interfaces/InferTypeOpInterface.td) - implementations). -* (Partially declarative) verification of arbitrary consistency constraints, - declarative (via [ODS - constraints](https://mlir.llvm.org/docs/DefiningDialects/Operations/#constraints)) - or imperative (via [C++ - verifiers](https://mlir.llvm.org/docs/DefiningDialects/Operations/#custom-verifier-code)). -* Mostly declarative pass management (via - [tablegen](https://mlir.llvm.org/docs/PassManagement/#declarative-pass-specification)). -* Versatile infrastructure for pattern-based rewriting (via - [DRR](https://mlir.llvm.org/docs/DeclarativeRewrites/) and [C++ - classes](https://mlir.llvm.org/docs/PatternRewriter/)). -* Powerful manipulation of imperative handling, creation, and modification of IR - using [native - classes](https://mlir.llvm.org/docs/Tutorials/Toy/Ch-2/#op-vs-operation-using-mlir-operations) - for operations, types, and attributes, - [walkers](https://mlir.llvm.org/docs/Tutorials/UnderstandingTheIRStructure/#walkers), - [builders](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/IR/Builders.h), - (IR) [interfaces](https://mlir.llvm.org/docs/Interfaces/), etc. (via ODS and - C++ infrastructure). -* Powerful - [location](https://mlir.llvm.org/docs/Dialects/Builtin/#location-attributes) - tracking and location-based error reporting. -* Generated [Python bindings](https://mlir.llvm.org/docs/Bindings/Python/) of IR - components, passes, and generic infrastructure (via ODS). -* Powerful command line argument handling and customizable implementation of - typical [tools](https://github.com/llvm/llvm-project/tree/main/mlir/tools) - (`X-opt`, `X-translate`, `X-lsp-server`, ...). -* [Testing infrastructure](https://mlir.llvm.org/getting_started/TestingGuide/) - that is optimized for compilers (via `lit` and `FileCheck`). -* A collection of [common types and - attributes](https://mlir.llvm.org/docs/Dialects/Builtin/) as well as - [dialects](https://mlir.llvm.org/docs/Dialects/) (i.e., operations) for more - or less generic purposes that can be used in or combined with custom dialects - and that come with [transformations](https://mlir.llvm.org/docs/Passes/) on - and [conversions](https://mlir.llvm.org/docs/DialectConversion/) to/from other - dialects. -* A collection of - [interfaces](https://github.com/llvm/llvm-project/tree/main/mlir/include/mlir/Interfaces) - and transformation passes on those interfaces, which allows to extend existing - transformations to new dialects easily. -* A support library with efficient data structures, platform-independent file - system abstraction, string utilities, etc. (via - [MLIR](https://github.com/llvm/llvm-project/tree/main/mlir/include/mlir/Support) - and - [LLVM](https://github.com/llvm/llvm-project/tree/main/llvm/include/llvm/Support) - support libraries). +The Substrait Dialect for MLIR has moved to the official Github organization at +[https://github.com/substrait-io/substrait-mlir-contrib](https://github.com/substrait-io/substrait-mlir-contrib). diff --git a/README.md b/README.md index 3ac8b1a3b506..15b6ce89f168 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,17 @@ The repository currently houses the following projects: * The [Iterators](README-Iterators.md) dialect: database-style iterators for expressing computations on streams of data. -* The [Substrait](README-Substrait.md) dialect: an input/output dialect for - [Substrait](https://substrait.io/), the cross-language serialization format - of database query plans. * The [Tuple](include/structured/Dialect/Tuple/): ops for manipulation of built-in tuples (used by the Iterators dialect). +The project no longer houses the following projects: + +* The Substrait dialect: an input/output dialect for + [Substrait](https://substrait.io/), the cross-language serialization format + of database query plans. This project now lives in the official Github + organization at + [https://github.com/substrait-io/substrait-mlir-contrib](https://github.com/substrait-io/substrait-mlir-contrib). + ## Build Instructions This project builds as part of the LLVM External Projects facility (see diff --git a/include/structured-c/Dialects.h b/include/structured-c/Dialects.h index c18bcb920202..3942828ba68a 100644 --- a/include/structured-c/Dialects.h +++ b/include/structured-c/Dialects.h @@ -28,35 +28,6 @@ bool mlirTypeIsAIteratorsStreamType(MlirType type); MLIR_CAPI_EXPORTED MlirType mlirIteratorsStreamTypeGet(MlirContext context, MlirType elementType); -//===----------------------------------------------------------------------===// -// Substrait dialect -//===----------------------------------------------------------------------===// - -MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(Substrait, substrait); - -/// Serialization/deserialization format for exporting/importing Substrait -/// plans. This corresponds to `::mlir::substrait::SerdeFormat`. -typedef enum MlirSubstraitSerdeFormat { - MlirSubstraitTextSerdeFormat, - MlirSubstraitBinarySerdeFormat, - MlirSubstraitJsonSerdeFormat, - MlirSubstraitPrettyJsonSerdeFormat -} MlirSubstraitSerdeFormat; - -/// Imports a `Plan` message from `input`, which must be in the specified -/// serialization format. Returns a null module and emits diagnostics in case of -/// an error. -MLIR_CAPI_EXPORTED -MlirModule mlirSubstraitImportPlan(MlirContext context, MlirStringRef input, - MlirSubstraitSerdeFormat format); - -/// Exports the provided `substrait.plan` or `builtin.module` op to protobuf in -/// the specified serialization format stored in the value of a `StringAttr`. -/// Returns a null attribute and emits diagnostics in case of an error. -MLIR_CAPI_EXPORTED -MlirAttribute mlirSubstraitExportPlan(MlirOperation op, - MlirSubstraitSerdeFormat format); - //===----------------------------------------------------------------------===// // Tabular dialect and types //===----------------------------------------------------------------------===// diff --git a/include/structured/Dialect/CMakeLists.txt b/include/structured/Dialect/CMakeLists.txt index 3550d02e8d7e..bc38b7c2b7bc 100644 --- a/include/structured/Dialect/CMakeLists.txt +++ b/include/structured/Dialect/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(Iterators) -add_subdirectory(Substrait) add_subdirectory(Tabular) add_subdirectory(Tuple) diff --git a/include/structured/Dialect/Substrait/CMakeLists.txt b/include/structured/Dialect/Substrait/CMakeLists.txt deleted file mode 100644 index 9f57627c321f..000000000000 --- a/include/structured/Dialect/Substrait/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_subdirectory(IR) -add_subdirectory(Transforms) diff --git a/include/structured/Dialect/Substrait/IR/CMakeLists.txt b/include/structured/Dialect/Substrait/IR/CMakeLists.txt deleted file mode 100644 index 90467123d09a..000000000000 --- a/include/structured/Dialect/Substrait/IR/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_mlir_dialect(SubstraitOps substrait) -add_dependencies(MLIRSubstraitDialect MLIRSubstraitOpsIncGen) - -set(LLVM_TARGET_DEFINITIONS SubstraitInterfaces.td) -mlir_tablegen(SubstraitOpInterfaces.h.inc -gen-op-interface-decls) -mlir_tablegen(SubstraitOpInterfaces.cpp.inc -gen-op-interface-defs) -mlir_tablegen(SubstraitTypeInterfaces.h.inc -gen-type-interface-decls) -mlir_tablegen(SubstraitTypeInterfaces.cpp.inc -gen-type-interface-defs) -add_public_tablegen_target(MLIRSubstraitInterfacesIncGen) -add_dependencies(MLIRSubstraitDialect MLIRSubstraitInterfacesIncGen) - -add_dependencies(mlir-headers - MLIRSubstraitOpsIncGen -) diff --git a/include/structured/Dialect/Substrait/IR/Substrait.h b/include/structured/Dialect/Substrait/IR/Substrait.h deleted file mode 100644 index 089046c0f7a3..000000000000 --- a/include/structured/Dialect/Substrait/IR/Substrait.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- Substrait.h - Substrait dialect -------------------------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef STRUCTURED_DIALECT_SUBSTRAIT_IR_SUBSTRAIT_H -#define STRUCTURED_DIALECT_SUBSTRAIT_IR_SUBSTRAIT_H - -#include "mlir/Dialect/Func/IR/FuncOps.h" // IWYU: keep -#include "mlir/IR/Dialect.h" // IWYU: keep -#include "mlir/IR/OpImplementation.h" // IWYU: keep -#include "mlir/IR/SymbolTable.h" // IWYU: keep -#include "mlir/Interfaces/InferTypeOpInterface.h" // IWYU: keep - -#include "structured/Dialect/Substrait/IR/SubstraitOpsDialect.h.inc" // IWYU: export - -#include "structured/Dialect/Substrait/IR/SubstraitOpInterfaces.h.inc" // IWYU: export -#include "structured/Dialect/Substrait/IR/SubstraitTypeInterfaces.h.inc" // IWYU: export - -#define GET_TYPEDEF_CLASSES -#include "structured/Dialect/Substrait/IR/SubstraitOpsTypes.h.inc" // IWYU: export - -#define GET_OP_CLASSES -#include "structured/Dialect/Substrait/IR/SubstraitOps.h.inc" // IWYU: export - -#endif // STRUCTURED_DIALECT_SUBSTRAIT_IR_SUBSTRAIT_H diff --git a/include/structured/Dialect/Substrait/IR/SubstraitDialect.td b/include/structured/Dialect/Substrait/IR/SubstraitDialect.td deleted file mode 100644 index 3589c43d0561..000000000000 --- a/include/structured/Dialect/Substrait/IR/SubstraitDialect.td +++ /dev/null @@ -1,39 +0,0 @@ -//===-- SubstraitDialect.td - Substrait dialect ------------*- tablegen -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITDIALECT -#define SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITDIALECT - -include "mlir/IR/OpBase.td" - -//===----------------------------------------------------------------------===// -// Dialect definition -//===----------------------------------------------------------------------===// - -def Substrait_Dialect : Dialect { - let name = "substrait"; - let cppNamespace = "::mlir::substrait"; - let summary = "Dialect for representing Substrait plans in MLIR."; - let description = [{ - This dialect is intented to represent [Substrait](https://substrait.io/) - query plans for relational algebra in MLIR. This may be useful for both - producers and consumers, which may use this dialect as a result or input - dialect of an MLIR-based pipeline, respectively, or for implementing - rewrites on Substrait that optimize or legalize plans between existing - Substraits producers and consumers. - - The ops and types in this dialect have an approximate one-to-one - correspondance with the specification and the - [protobuf](https://github.com/substrait-io/substrait/tree/main/proto/substrait) - message types. The correspondance is only approximate since it is often - more natural in MLIR to represent several message types as a single op and - express message sub-types with interfaces instead. - }]; -} - -#endif // SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITDIALECT diff --git a/include/structured/Dialect/Substrait/IR/SubstraitInterfaces.td b/include/structured/Dialect/Substrait/IR/SubstraitInterfaces.td deleted file mode 100644 index 141d1a5acd7d..000000000000 --- a/include/structured/Dialect/Substrait/IR/SubstraitInterfaces.td +++ /dev/null @@ -1,34 +0,0 @@ -//===-- SubstraitInterfaces.td - Substrait interfaces ------*- tablegen -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITINTERFACES -#define SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITINTERFACES - -include "mlir/IR/OpBase.td" - -def Substrait_ExpressionOpInterface : OpInterface<"ExpressionOpInterface"> { - let description = [{ - Interface for any expression in a Substrait plan. This corresponds to an - `Expression` message, which only consists of the `rex_type` field, which, in - turn, holds a more specialized message with the information specific to the - concrete expression. - }]; - let cppNamespace = "::mlir::substrait"; -} - -def Substrait_RelOpInterface : OpInterface<"RelOpInterface"> { - let description = [{ - Interface for any relational operation in a Substrait plan. This corresponds - to a `Rel` message, which only consists of the `rel_type` field, which, in - turn, holds a more specialized message with the information specific to the - concrete relational operation. - }]; - let cppNamespace = "::mlir::substrait"; -} - -#endif // SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITINTERFACES diff --git a/include/structured/Dialect/Substrait/IR/SubstraitOps.td b/include/structured/Dialect/Substrait/IR/SubstraitOps.td deleted file mode 100644 index 6eeb62791d7c..000000000000 --- a/include/structured/Dialect/Substrait/IR/SubstraitOps.td +++ /dev/null @@ -1,528 +0,0 @@ -//===-- SubstraitOps.td - Substrait operations definitions -*- tablegen -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITOPS -#define SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITOPS - -include "structured/Dialect/Substrait/IR/SubstraitDialect.td" -include "structured/Dialect/Substrait/IR/SubstraitInterfaces.td" -include "structured/Dialect/Substrait/IR/SubstraitTypes.td" -include "mlir/Interfaces/InferTypeOpInterface.td" -include "mlir/Interfaces/SideEffectInterfaces.td" -include "mlir/IR/BuiltinAttributes.td" -include "mlir/IR/CommonAttrConstraints.td" -include "mlir/IR/OpBase.td" -include "mlir/IR/SymbolInterfaces.td" - -class Substrait_Op traits = []> : - Op { -} - -//===----------------------------------------------------------------------===// -// Constraints -//===----------------------------------------------------------------------===// - -class RegionOf : Region< - Concat<"::llvm::all_of($_self.getOps(), [](::mlir::Operation &op) { " - "return ", - SubstLeaves<"$_self", "op", condition.predicate>, - "; })">, - "region where each child op is " # condition.summary>; - -class IsOp : Constraint< - CPred<"::llvm::isa<" # opType # ">($_self)">, - "op of type '" # opType # "'">; - -class AnyOf opTypes, string summary = ""> : Constraint< - Or, - !if(!eq(summary, ""), - !interleave(!foreach(t, opTypes, t.summary), " or "), - summary)>; - -def StringArrayAttr : - TypedArrayAttrBase { - let storageType = [{ ::mlir::ArrayAttr }]; -} - -//===----------------------------------------------------------------------===// -// Extensions -//===----------------------------------------------------------------------===// -// The definitions in this section are related to the extension messages. -// See https://substrait.io/serialization/binary_serialization/ and -// https://github.com/substrait-io/substrait/blob/main/proto/substrait/extensions/extensions.proto. -//===----------------------------------------------------------------------===// - -def Substrait_ExtensionUriOp : Substrait_Op<"extension_uri", [ - Symbol - ]> { - let summary = "Declares a simple extension URI"; - let description = [{ - This op represents the `SimpleExtensionURI` message type of Substrait. It is - a `Symbol` op, so it can be looked up in the symbol table of the plan it is - contained in. - - Example code: - - ```mlir - substrait.plan version 0 : 42 : 1 { - extension_uri @uri at "http://some.url/with/extensions.yml" - extension_function @function at @uri["func1"] - // ... - } - ``` - }]; - let arguments = (ins - SymbolNameAttr:$sym_name, // corresponds to `anchor` - StrAttr:$uri - ); - let assemblyFormat = "$sym_name `at` $uri attr-dict"; -} - -class Substrait_ExtensionOp traits = []> : - Substrait_Op<"extension_" # mnemonic, traits # [ - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods - ]> { - let description = [{ - This op represents the `SimpleExtensionDeclaration` message type of - Substrait along with the `Extension}] - # snakeCaseToCamelCase.ret # - [{` message type in the `mapping_type` case. It is both a `Symbol` op, so it - can be looked up in the symbol table of the plan it is contained in. - Conversely, its symbol reference `uri` must refer to an extension URI op - in the nearest symbol table. - }]; - let arguments = (ins - SymbolNameAttr:$sym_name, // corresponds to `anchor` - FlatSymbolRefAttr:$uri, - StrAttr:$name - ); - let assemblyFormat = "$sym_name `at` $uri `[` $name `]` attr-dict"; - let extraClassDefinition = [{ - /// Implement `SymbolOpInterface`. - ::mlir::LogicalResult $cppClass::verifySymbolUses( - mlir::SymbolTableCollection &symbolTables) { - if (!symbolTables.lookupNearestSymbolFrom(*this, - getUriAttr())) - return emitOpError() << "refers to " << getUriAttr() - << ", which is not a valid 'uri' op"; - return success(); - } - }]; -} - -def Substrait_ExtensionFunctionOp : Substrait_ExtensionOp<"function"> { - let summary = "Declares a simple extension function"; -} - -def Substrait_ExtensionTypeOp : Substrait_ExtensionOp<"type"> { - let summary = "Declares a simple extension type"; -} - -def Substrait_ExtensionTypeVariationOp : - Substrait_ExtensionOp<"type_variation"> { - let summary = "Declares a simple extension type variation"; -} - -//===----------------------------------------------------------------------===// -// Plan -//===----------------------------------------------------------------------===// -// The definitions in this section are related to the top-level `Plan` message. -// See https://substrait.io/serialization/binary_serialization/ and -// https://github.com/substrait-io/substrait/blob/main/proto/substrait/plan.proto. -//===----------------------------------------------------------------------===// - -def PlanBodyOp : AnyOf<[ - IsOp<"::mlir::substrait::PlanRelOp">, - IsOp<"::mlir::substrait::ExtensionUriOp">, - IsOp<"::mlir::substrait::ExtensionFunctionOp">, - IsOp<"::mlir::substrait::ExtensionTypeOp">, - IsOp<"::mlir::substrait::ExtensionTypeVariationOp">, - ]>; - -def Substrait_PlanOp : Substrait_Op<"plan", [ - DeclareOpInterfaceMethods, - NoTerminator, NoRegionArguments, SingleBlock, SymbolTable - ]> { - let summary = "Represents a Substrait plan"; - let description = [{ - This op represents the `Plan` message type of Substrait. It carries the - version information inline as attributes, so it also subsumes the `Version` - message type. The body of the op consists of the `relation`s and the - function and type extensions defined in the plan. - }]; - let arguments = (ins - UI32Attr:$major_number, - UI32Attr:$minor_number, - UI32Attr:$patch_number, - DefaultValuedAttr:$git_hash, - DefaultValuedAttr:$producer - ); - let regions = (region RegionOf:$body); - let assemblyFormat = [{ - `version` $major_number `:` $minor_number `:` $patch_number - (`git_hash` $git_hash^)? (`producer` $producer^)? - attr-dict-with-keyword $body - }]; - let builders = [ - OpBuilder<(ins "uint32_t":$major, "uint32_t":$minor, "uint32_t":$patch), [{ - build($_builder, $_state, major, minor, patch, - StringAttr(), StringAttr()); - }]> - ]; - let extraClassDefinition = [{ - /// Implement OpAsmOpInterface. - ::llvm::StringRef $cppClass::getDefaultDialect() { - return SubstraitDialect::getDialectNamespace(); - } - }]; -} - -def RelationBodyOp : AnyOf<[ - IsOp<"::mlir::substrait::RelOpInterface">, - IsOp<"::mlir::substrait::YieldOp"> - ]>; - -def Substrait_PlanRelOp : Substrait_Op<"relation", [ - DeclareOpInterfaceMethods, - HasParent<"::mlir::substrait::PlanOp">, - SingleBlockImplicitTerminator<"::mlir::substrait::YieldOp">, - NoRegionArguments - ]> { - let summary = "Represents a query tree in a Substrait plan"; - let description = [{ - Represents a `PlanRel` message, which is used in the `relations` field of - the `Plan` message. The same op can represent either the `Rel`, in which - case the `fieldNames` attribute is not set, or the `RootRel` case, in which - case the `fieldNames` attribute corresponds to the `RelRoot.names` field. - The body of this op contains various `RelOpInterface` ops (corresponding to - the `Rel` message type) producing SSA values and the one being yielded - reprents the root of the query tree that this op contains. - }]; - let arguments = (ins OptionalAttr:$fieldNames); - let regions = (region RegionOf:$body); - let assemblyFormat = "(`as` $fieldNames^)? attr-dict-with-keyword $body"; - let hasRegionVerifier = 1; - let builders = [ - OpBuilder<(ins ), [{ - build($_builder, $_state, ArrayAttr()); - }]> - ]; - let extraClassDefinition = [{ - /// Implement OpAsmOpInterface. - ::llvm::StringRef $cppClass::getDefaultDialect() { - return SubstraitDialect::getDialectNamespace(); - } - }]; -} - -def Substrait_YieldOp : Substrait_Op<"yield", [ - Terminator, - ParentOneOf<[ - "::mlir::substrait::FilterOp", - "::mlir::substrait::PlanRelOp", - "::mlir::substrait::ProjectOp" - ]> - ]> { - let summary = "Yields the result of a `PlanRelOp`"; - let arguments = (ins Variadic:$value); - let assemblyFormat = "attr-dict ($value^ `:` type($value))?"; - let builders = [OpBuilder<(ins), [{ /* do nothing */ }]>]; -} - -//===----------------------------------------------------------------------===// -// Expressions -//===----------------------------------------------------------------------===// -// The definitions in this section are related to the various `Expression` -// message types. See https://substrait.io/expressions/field_references/ and -// https://github.com/substrait-io/substrait/blob/main/proto/substrait/algebra.proto. -//===----------------------------------------------------------------------===// - -// TODO(ingomueller): Make this a proper base for expressions. -class Substrait_ExpressionOp traits = []> : - Substrait_Op; - -def Substrait_FieldReferenceOp : Substrait_ExpressionOp<"field_reference", [ - DeclareOpInterfaceMethods - ]> { - let summary = "Field reference expression"; - let description = [{ - Represents a `FieldReference` message together with all messages it contains - and the `Expression` message it is contained in. - - Example: - - ```mlir - %0 = ... - %1 = field_reference %0[0] : tuple - // %1 is of type `si32` - ``` - }]; - // TODO(ingomueller): extend to other types, map access, and masked references. - let arguments = (ins - Substrait_ContainerType:$container, - DenseI64ArrayAttr:$position - ); - let results = (outs Substrait_FieldType:$result); - let assemblyFormat = [{ - $container `` $position attr-dict `:` type($container) - }]; -} - -def Substrait_LiteralOp : Substrait_ExpressionOp<"literal", [ - DeclareOpInterfaceMethods - ]> { - let summary = "Literal expression"; - let description = [{ - Represents a `Literal` message together with all messages it contains and - the `Expression` message it is contained in. - - Example: - - ```mlir - %0 = literal -1 : si1 - ``` - }]; - // TODO(ingomueller): extend to other types. - let arguments = (ins Substrait_AtomicAttribute:$value); - let results = (outs Substrait_AtomicType:$result); - let assemblyFormat = "$value attr-dict"; -} - -def Substrait_CallOp : Substrait_ExpressionOp<"call", [ - DeclareOpInterfaceMethods, - ]> { - let summary = "Function call expression"; - let description = [{ - Represents a `ScalarFunction` message (or, in the future, other `*Function` - messages) together with all messages it contains and the `Expression` - message it is contained in. - - Currently, the specification of the function, which is in an external YAML - file, is not taken into account, for example, to verify whether a matching - overload exists or to verify/compute the result type. - - Example: - - ```mlir - extension_uri @extension at "http://some.url/with/extensions.yml" - extension_function @function at @extension["somefunc"] - relation { - // ... - %1 = call @function(%0) : (tuple) -> si1 - // ... - } - ``` - }]; - // TODO(ingomueller): Add `FunctionOptions`. - // TODO(ingomueller): Add support for `enum` and `type` argument types. - let arguments = (ins - FlatSymbolRefAttr:$callee, - Variadic:$args - ); - let results = (outs Substrait_FieldType:$result); - let assemblyFormat = [{ - $callee `(` $args `)` attr-dict `:` `(` type($args) `)` `->` type($result) - }]; -} - -//===----------------------------------------------------------------------===// -// Relations -//===----------------------------------------------------------------------===// -// The definitions in this section are related to the various `Rel` message -// types. See https://substrait.io/relations/basics/ and -// https://github.com/substrait-io/substrait/blob/main/proto/substrait/algebra.proto. -//===----------------------------------------------------------------------===// - -/// Attaches all traits that ops representing a `Rel` message type should have. -class Substrait_RelOp traits = []> : - Substrait_Op, - SubstLeaves<"$_self", "$_op.getResult(0).getType()", - Substrait_Relation.predicate> - ]>> - ]>; - -def Substrait_CrossOp : Substrait_RelOp<"cross", [ - DeclareOpInterfaceMethods - ]> { - let summary = "Cross product operation"; - let description = [{ - Represents a `CrossRel` message together with the `RelCommon` and left and - right `Rel` messages it contains. - - Example: - - ```mlir - %0 = ... - %1 = ... - %2 = cross %0 x %1 : tuple x tuple -> tuple - ``` - }]; - let arguments = (ins - Substrait_Relation:$left, - Substrait_Relation:$right - ); - let results = (outs Substrait_Relation:$result); - let assemblyFormat = [{ - $left `x` $right attr-dict `:` type($left) `x` type($right) - }]; -} - -def Substrait_EmitOp : Substrait_RelOp<"emit", [ - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods - ]> { - let summary = "Projection (a.k.a. 'emit') as dedicated operation"; - let description = [{ - Represents the `Emit` message of the `emit_kind` field in the `RelCommon` - message. While projection is inlined into all relations in the protobuf - format, this op separates out this functionality in a dedicated op in order - to simplify rewriting. - - Example: - - ```mlir - %0 = ... - %1 = emit [2, 1] from %0 : tuple -> tuple - ``` - }]; - let arguments = (ins - Substrait_Relation:$input, - I64ArrayAttr:$mapping - ); - let results = (outs Substrait_Relation:$result); - let assemblyFormat = [{ - $mapping `from` $input attr-dict `:` type($input) `->` type($result) - }]; - let hasFolder = 1; - let extraClassDefinition = [{ - /// Implement OpAsmOpInterface. - ::llvm::StringRef $cppClass::getDefaultDialect() { - return SubstraitDialect::getDialectNamespace(); - } - }]; -} - -def Substrait_FilterOp : Substrait_RelOp<"filter", [ - SingleBlockImplicitTerminator<"::mlir::substrait::YieldOp">, - DeclareOpInterfaceMethods, - SameOperandsAndResultType - ]> { - let summary = "Filter operation"; - let description = [{ - Represents a `FilterRel` message together with the `RelCommon`, input `Rel`, - and `Expression` messages it contains. - - Example: - - ```mlir - %0 = ... - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %true = literal -1 : si1 - yield %true : si1 - } - ``` - }]; - let arguments = (ins Substrait_Relation:$input); - let regions = (region AnyRegion:$condition); - let results = (outs Substrait_Relation:$result); - // TODO(ingomueller): We could elide/shorten the block argument from the - // assembly by writing custom printers/parsers similar to - // `scf.for` etc. - let assemblyFormat = [{ - $input attr-dict `:` type($input) $condition - }]; - let hasRegionVerifier = 1; - let extraClassDefinition = [{ - /// Implement OpAsmOpInterface. - ::llvm::StringRef $cppClass::getDefaultDialect() { - return SubstraitDialect::getDialectNamespace(); - } - }]; -} - -def Substrait_NamedTableOp : Substrait_RelOp<"named_table", [ - ]> { - let summary = "Read operation of a named table"; - let description = [{ - Represents a `NamedTable` message together with the `ReadRel` and `Rel` - messages that contain it. - - Example: - - ```mlir - %0 = named_table @t1 as ["a"] : tuple - ``` - }]; - // TODO(ingomueller): Maybe the result names should be part of a to-be-created - // `NamedStruct` type? - let arguments = (ins - SymbolRefAttr:$table_name, - StringArrayAttr:$field_names - ); - let results = (outs Substrait_Relation:$result); - let assemblyFormat = [{ - $table_name `as` $field_names attr-dict `:` type($result) - }]; - let hasVerifier = true; -} - -def Substrait_ProjectOp : Substrait_RelOp<"project", [ - SingleBlockImplicitTerminator<"::mlir::substrait::YieldOp">, - DeclareOpInterfaceMethods - ]> { - let summary = "Project operation"; - let description = [{ - Represents a `ProjectRel` message together with the `RelCommon`, input - `Rel`, and `Expression` messages it contains. While in protobuf the - different `Expression` messages are distinct trees, the `project` op has - a single `expression` region with one terminating `yield` and the values - yielded by that terminator correspond to the expressions. Each individual - `Expression` thus corresponds to the whole use-def tree of the corresponding - yielded value. - - Example: - - ```mlir - %0 = ... - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %true = literal -1 : si1 - %42 = literal 42 : si32 - yield %true, %42 : si1, si32 - } - ``` - }]; - let arguments = (ins Substrait_Relation:$input); - let regions = (region AnyRegion:$expressions); - let results = (outs Substrait_Relation:$result); - // TODO(ingomueller): We could elide/shorten the block argument from the - // assembly by writing custom printers/parsers similar to - // `scf.for` etc. - let assemblyFormat = [{ - $input attr-dict `:` type($input) `->` type($result) $expressions - }]; - let hasRegionVerifier = 1; - let hasFolder = 1; - let extraClassDefinition = [{ - /// Implement OpAsmOpInterface. - ::llvm::StringRef $cppClass::getDefaultDialect() { - return SubstraitDialect::getDialectNamespace(); - } - }]; -} - -#endif // SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITOPS diff --git a/include/structured/Dialect/Substrait/IR/SubstraitTypes.td b/include/structured/Dialect/Substrait/IR/SubstraitTypes.td deleted file mode 100644 index 4c684b3ce579..000000000000 --- a/include/structured/Dialect/Substrait/IR/SubstraitTypes.td +++ /dev/null @@ -1,60 +0,0 @@ -//===-- SubstraitTypes.td - Substrait dialect types --------*- tablegen -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITTYPES -#define SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITTYPES - -include "structured/Dialect/Substrait/IR/SubstraitDialect.td" -include "mlir/IR/CommonTypeConstraints.td" -include "mlir/IR/OpBase.td" - -// Base class for Substrait dialect types. -class Substrait_Type traits = []> - : TypeDef { - let mnemonic = typeMnemonic; -} - -/// Currently supported atomic types. These correspond directly to the types in -/// https://github.com/substrait-io/substrait/blob/main/proto/substrait/type.proto. -// TODO(ingomueller): Add the other low-hanging fruits here. -def Substrait_AtomicTypes { - list types = [ - SI1, // Boolean - SI32 // I32 - ]; -} - -/// Attributes of currently supported atomic types. -def Substrait_AtomicAttributes { - list attrs = [ - SI1Attr, // Boolean - SI32Attr // I32 - ]; -} - -/// Attribute of one of the currently supported atomic types. -def Substrait_AtomicAttribute : AnyAttrOf; - -/// One of the currently supported atomic types. -def Substrait_AtomicType : AnyTypeOf; - -/// Any container type, i.e., structs, maps, lists, and nestings thereof. -def Substrait_ContainerType : NestedTupleOf; - -/// One of the currently supported atomic or nested types. -def Substrait_FieldType : AnyTypeOf<[ - Substrait_AtomicType, - Substrait_ContainerType -]>; - -/// Placeholder for a proper relation type, the result of any `RelOpInterface` -/// op. -// TODO(ingomueller): Transform this into a proper relation type. -def Substrait_Relation : NestedTupleOf; - -#endif // SUBSTRAIT_DIALECT_SUBSTRAIT_IR_SUBSTRAITTYPES diff --git a/include/structured/Dialect/Substrait/Transforms/CMakeLists.txt b/include/structured/Dialect/Substrait/Transforms/CMakeLists.txt deleted file mode 100644 index b71dd583fb5a..000000000000 --- a/include/structured/Dialect/Substrait/Transforms/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls -name Substrait) -add_public_tablegen_target(MLIRSubstraitTransformsIncGen) diff --git a/include/structured/Dialect/Substrait/Transforms/Passes.h b/include/structured/Dialect/Substrait/Transforms/Passes.h deleted file mode 100644 index 02f640088c7f..000000000000 --- a/include/structured/Dialect/Substrait/Transforms/Passes.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- Passes.h - Substrait pass declarations -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef STRUCTURED_DIALECT_SUBSTRAIT_TRANSFORMS_PASSES_H_ -#define STRUCTURED_DIALECT_SUBSTRAIT_TRANSFORMS_PASSES_H_ - -#include "mlir/Pass/Pass.h" - -namespace mlir { -namespace substrait { - -#define GEN_PASS_DECL -#include "structured/Dialect/Substrait/Transforms/Passes.h.inc" - -/// Create a pass to eliminate duplicate fields in `emit` ops. -std::unique_ptr createEmitDeduplicationPass(); - -/// Add patterns that eliminate duplicate fields in `emit` ops. -void populateEmitDeduplicationPatterns(RewritePatternSet &patterns); - -/// Generate the code for registering passes. -#define GEN_PASS_REGISTRATION -#include "structured/Dialect/Substrait/Transforms/Passes.h.inc" - -} // namespace substrait -} // namespace mlir - -#endif // STRUCTURED_DIALECT_SUBSTRAIT_TRANSFORMS_PASSES_H_ diff --git a/include/structured/Dialect/Substrait/Transforms/Passes.td b/include/structured/Dialect/Substrait/Transforms/Passes.td deleted file mode 100644 index 9851331c75bd..000000000000 --- a/include/structured/Dialect/Substrait/Transforms/Passes.td +++ /dev/null @@ -1,55 +0,0 @@ -//===-- Passes.td - Substrait pass definition file ---------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef STRUCTURED_DIALECT_SUBSTRAIT_TRANSFORMS_PASSES -#define STRUCTURED_DIALECT_SUBSTRAIT_TRANSFORMS_PASSES - -include "mlir/Pass/PassBase.td" - -def SubstraitEmitDeduplicationPass - : Pass<"substrait-emit-deduplication"> { - let summary = "Remove duplicate emit fields."; - let description = [{ - Removes duplicates in the mapping of `emit` ops. This is somewhat similar to - CSE in that it fuses redundant values; however, the redudant values are - fields in the tuples/rows/structs inside of a `Relation` rather than SSA - values. The deduplication consist of a pattern for each `RelOp` that removes - the duplicate fields in a preceeding `emit` op and then re-establishes the - original sequence of fields with a subsequent `emit` op (which may be fused - with other emit ops and/or enable further deduplication). - - Example: - - ```mlir - %0 = ... - %1 = ... - %2 = emit [0, 0] from %0 : tuple -> tuple - %3 = cross %1 x %2 : tuple x tuple - yield $3 : tuple - ``` - - Here, the `emit` op introduces a duplicate field by emitting the field `0` - twice, so subsequent `RelOp`s have larger inputs than necessary. The pass - pushes the duplication through the subsequent op, `cross`, like this: - - ```mlir - %0 = ... - %1 = ... - %2 = emit [0] from %0 : tuple -> tuple - %3 = cross %1 x %2 : tuple x tuple - %4 = emit [0, 1, 1] from %0 : tuple -> tuple - yield $4 : tuple - ``` - - The final `emit` cannot be pushed further as the encompassing `relation` - needs to keep the fields indicated by the `yield` op. - }]; - let constructor = "::mlir::substrait::createEmitDeduplicationPass()"; -} - -#endif // STRUCTURED_DIALECT_SUBSTRAIT_TRANSFORMS_PASSES diff --git a/include/structured/Target/SubstraitPB/Export.h b/include/structured/Target/SubstraitPB/Export.h deleted file mode 100644 index 8186e752b733..000000000000 --- a/include/structured/Target/SubstraitPB/Export.h +++ /dev/null @@ -1,28 +0,0 @@ -//===-- Export.h - Export Substrait dialect to protobuf ---------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef STRUCTURED_TARGET_SUBSTRAITPB_EXPORT_H -#define STRUCTURED_TARGET_SUBSTRAITPB_EXPORT_H - -#include "structured/Target/SubstraitPB/Options.h" -#include "llvm/Support/raw_ostream.h" - -namespace mlir { -class Operation; -class LogicalResult; - -namespace substrait { - -LogicalResult -translateSubstraitToProtobuf(Operation *op, llvm::raw_ostream &output, - substrait::ImportExportOptions options = {}); - -} // namespace substrait -} // namespace mlir - -#endif // STRUCTURED_TARGET_SUBSTRAITPB_EXPORT_H diff --git a/include/structured/Target/SubstraitPB/Import.h b/include/structured/Target/SubstraitPB/Import.h deleted file mode 100644 index 39bb4d0fa33b..000000000000 --- a/include/structured/Target/SubstraitPB/Import.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- Import.h - Import protobuf to Substrait dialect ---------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef STRUCTURED_TARGET_SUBSTRAITPB_IMPORT_H -#define STRUCTURED_TARGET_SUBSTRAITPB_IMPORT_H - -#include "structured/Target/SubstraitPB/Options.h" -#include "llvm/ADT/StringRef.h" - -namespace mlir { - -class MLIRContext; -class ModuleOp; -template -class OwningOpRef; - -namespace substrait { - -OwningOpRef -translateProtobufToSubstrait(llvm::StringRef input, MLIRContext *context, - substrait::ImportExportOptions options = {}); - -} // namespace substrait -} // namespace mlir - -#endif // STRUCTURED_TARGET_SUBSTRAITPB_IMPORT_H diff --git a/include/structured/Target/SubstraitPB/Options.h b/include/structured/Target/SubstraitPB/Options.h deleted file mode 100644 index 1267ec242d49..000000000000 --- a/include/structured/Target/SubstraitPB/Options.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- Options.h - Options for import/and export of Substrait --*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef STRUCTURED_TARGET_SUBSTRAITPB_OPTIONS_H -#define STRUCTURED_TARGET_SUBSTRAITPB_OPTIONS_H - -#include "llvm/ADT/StringRef.h" - -namespace mlir { -namespace substrait { - -/// Serialization formats for serialization and deserialization to and from -/// protobuf messages. -enum class SerdeFormat { kText, kBinary, kJson, kPrettyJson }; - -struct ImportExportOptions { - /// Specifies which serialization formats is used for serialization and - /// deserialization to and from protobuf messages. - SerdeFormat serdeFormat; -}; - -} // namespace substrait -} // namespace mlir - -#endif // STRUCTURED_TARGET_SUBSTRAITPB_OPTIONS_H diff --git a/lib/CAPI/CMakeLists.txt b/lib/CAPI/CMakeLists.txt index 7a97f3f6d6e0..813a0d37a3c1 100644 --- a/lib/CAPI/CMakeLists.txt +++ b/lib/CAPI/CMakeLists.txt @@ -8,10 +8,8 @@ add_mlir_public_c_api_library(StructuredCAPI MLIRIterators MLIRIteratorsToLLVM MLIRIteratorsTransforms - MLIRSubstraitDialect MLIRTabular MLIRTabularToLLVM - MLIRTargetSubstraitPB MLIRTupleDialect MLIRTupleTransforms MLIRPass diff --git a/lib/CAPI/Dialects.cpp b/lib/CAPI/Dialects.cpp index e4a823ba20e8..dc49b0f0b956 100644 --- a/lib/CAPI/Dialects.cpp +++ b/lib/CAPI/Dialects.cpp @@ -15,16 +15,11 @@ #include "mlir/CAPI/Support.h" #include "mlir/IR/Types.h" #include "structured/Dialect/Iterators/IR/Iterators.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" #include "structured/Dialect/Tabular/IR/Tabular.h" #include "structured/Dialect/Tuple/IR/Tuple.h" -#include "structured/Target/SubstraitPB/Export.h" -#include "structured/Target/SubstraitPB/Import.h" -#include "structured/Target/SubstraitPB/Options.h" using namespace mlir; using namespace mlir::iterators; -using namespace mlir::substrait; using namespace mlir::tabular; using namespace mlir::tuple; @@ -42,51 +37,6 @@ MlirType mlirIteratorsStreamTypeGet(MlirContext context, MlirType elementType) { return wrap(StreamType::get(unwrap(context), unwrap(elementType))); } -//===----------------------------------------------------------------------===// -// Substrait dialect -//===----------------------------------------------------------------------===// - -MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(Substrait, substrait, SubstraitDialect) - -/// Converts the provided enum value into the equivalent value from -/// `::mlir::substrait::SerdeFormat`. -SerdeFormat convertSerdeFormat(MlirSubstraitSerdeFormat format) { - switch (format) { - case MlirSubstraitBinarySerdeFormat: - return SerdeFormat::kBinary; - case MlirSubstraitTextSerdeFormat: - return SerdeFormat::kText; - case MlirSubstraitJsonSerdeFormat: - return SerdeFormat::kJson; - case MlirSubstraitPrettyJsonSerdeFormat: - return SerdeFormat::kPrettyJson; - } -} - -MlirModule mlirSubstraitImportPlan(MlirContext context, MlirStringRef input, - MlirSubstraitSerdeFormat format) { - ImportExportOptions options; - options.serdeFormat = convertSerdeFormat(format); - OwningOpRef owning = - translateProtobufToSubstrait(unwrap(input), unwrap(context), options); - if (!owning) - return MlirModule{nullptr}; - return MlirModule{owning.release().getOperation()}; -} - -MlirAttribute mlirSubstraitExportPlan(MlirOperation op, - MlirSubstraitSerdeFormat format) { - std::string str; - llvm::raw_string_ostream stream(str); - ImportExportOptions options; - options.serdeFormat = convertSerdeFormat(format); - if (failed(translateSubstraitToProtobuf(unwrap(op), stream, options))) - return wrap(Attribute()); - MLIRContext *context = unwrap(op)->getContext(); - Attribute attr = StringAttr::get(context, str); - return wrap(attr); -} - //===----------------------------------------------------------------------===// // Tabular dialect and types //===----------------------------------------------------------------------===// diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index b5b782e3c2fa..fdc1d6eed337 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,5 +1,4 @@ add_subdirectory(CAPI) add_subdirectory(Conversion) add_subdirectory(Dialect) -add_subdirectory(Target) add_subdirectory(Utils) diff --git a/lib/Dialect/CMakeLists.txt b/lib/Dialect/CMakeLists.txt index 3550d02e8d7e..bc38b7c2b7bc 100644 --- a/lib/Dialect/CMakeLists.txt +++ b/lib/Dialect/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(Iterators) -add_subdirectory(Substrait) add_subdirectory(Tabular) add_subdirectory(Tuple) diff --git a/lib/Dialect/Substrait/CMakeLists.txt b/lib/Dialect/Substrait/CMakeLists.txt deleted file mode 100644 index 9f57627c321f..000000000000 --- a/lib/Dialect/Substrait/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_subdirectory(IR) -add_subdirectory(Transforms) diff --git a/lib/Dialect/Substrait/IR/CMakeLists.txt b/lib/Dialect/Substrait/IR/CMakeLists.txt deleted file mode 100644 index af07ab40c4af..000000000000 --- a/lib/Dialect/Substrait/IR/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_mlir_dialect_library(MLIRSubstraitDialect - Substrait.cpp - - LINK_LIBS PUBLIC - MLIRInferTypeOpInterface - MLIRIR - - DEPENDS - MLIRSubstraitOpsIncGen -) diff --git a/lib/Dialect/Substrait/IR/Substrait.cpp b/lib/Dialect/Substrait/IR/Substrait.cpp deleted file mode 100644 index b553bc3210cb..000000000000 --- a/lib/Dialect/Substrait/IR/Substrait.cpp +++ /dev/null @@ -1,407 +0,0 @@ -//===-- Substrait.cpp - Substrait dialect -----------------------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "structured/Dialect/Substrait/IR/Substrait.h" - -#include "mlir/Dialect/LLVMIR/LLVMTypes.h" -#include "mlir/IR/DialectImplementation.h" -#include "mlir/Support/LogicalResult.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/TypeSwitch.h" - -using namespace mlir; -using namespace mlir::substrait; - -//===----------------------------------------------------------------------===// -// Substrait dialect -//===----------------------------------------------------------------------===// - -#include "structured/Dialect/Substrait/IR/SubstraitOpsDialect.cpp.inc" - -void SubstraitDialect::initialize() { -#define GET_OP_LIST - addOperations< -#include "structured/Dialect/Substrait/IR/SubstraitOps.cpp.inc" - >(); - addTypes< -#define GET_TYPEDEF_LIST -#include "structured/Dialect/Substrait/IR/SubstraitOpsTypes.cpp.inc" - >(); -} - -//===----------------------------------------------------------------------===// -// Substrait interfaces -//===----------------------------------------------------------------------===// - -#include "structured/Dialect/Substrait/IR/SubstraitOpInterfaces.cpp.inc" -#include "structured/Dialect/Substrait/IR/SubstraitTypeInterfaces.cpp.inc" - -//===----------------------------------------------------------------------===// -// Substrait operations -//===----------------------------------------------------------------------===// - -#define GET_OP_CLASSES -#include "structured/Dialect/Substrait/IR/SubstraitOps.cpp.inc" - -namespace mlir { -namespace substrait { - -/// Implement `SymbolOpInterface`. -::mlir::LogicalResult -CallOp::verifySymbolUses(SymbolTableCollection &symbolTables) { - if (!symbolTables.lookupNearestSymbolFrom( - *this, getCalleeAttr())) - return emitOpError() << "refers to " << getCalleeAttr() - << ", which is not a valid 'extension_function' op"; - return success(); -} - -LogicalResult -CrossOp::inferReturnTypes(MLIRContext *context, std::optional loc, - ValueRange operands, DictionaryAttr attributes, - OpaqueProperties properties, RegionRange regions, - llvm::SmallVectorImpl &inferredReturnTypes) { - Value leftInput = operands[0]; - Value rightInput = operands[1]; - - TypeRange leftFieldTypes = cast(leftInput.getType()).getTypes(); - TypeRange rightFieldTypes = cast(rightInput.getType()).getTypes(); - - SmallVector fieldTypes; - llvm::append_range(fieldTypes, leftFieldTypes); - llvm::append_range(fieldTypes, rightFieldTypes); - auto resultType = TupleType::get(context, fieldTypes); - - inferredReturnTypes = SmallVector{resultType}; - - return success(); -} - -OpFoldResult EmitOp::fold(FoldAdaptor adaptor) { - MLIRContext *context = getContext(); - Type i64 = IntegerType::get(context, 64); - - // If the input is also an `emit`, fold it into this op. - if (auto previousEmit = dyn_cast(getInput().getDefiningOp())) { - // Compute new mapping. - ArrayAttr previousMapping = previousEmit.getMapping(); - SmallVector newMapping; - newMapping.reserve(getMapping().size()); - for (auto attr : getMapping().getAsRange()) { - int64_t index = attr.getInt(); - int64_t newIndex = cast(previousMapping[index]).getInt(); - newMapping.push_back(IntegerAttr::get(i64, newIndex)); - } - - // Update this op. - setMappingAttr(ArrayAttr::get(context, newMapping)); - setOperand(previousEmit.getInput()); - return getResult(); - } - - // Remainder: fold away if the mapping is the identity mapping. - - // Return if the mapping is not the identity mapping. - int64_t numFields = cast(getInput().getType()).size(); - int64_t numIndices = getMapping().size(); - if (numFields != numIndices) - return {}; - for (int64_t i = 0; i < numIndices; ++i) { - auto attr = getMapping()[i]; - int64_t index = cast(attr).getInt(); - if (index != i) - return {}; - } - - // The `emit` op *has* an identity mapping, so it does not have any effect. - // Return its input instead. - return getInput(); -} - -LogicalResult -EmitOp::inferReturnTypes(MLIRContext *context, std::optional loc, - ValueRange operands, DictionaryAttr attributes, - OpaqueProperties properties, RegionRange regions, - llvm::SmallVectorImpl &inferredReturnTypes) { - auto *typedProperties = properties.as(); - if (!loc) - loc = UnknownLoc::get(context); - - ArrayAttr mapping = typedProperties->getMapping(); - Type inputType = operands[0].getType(); - ArrayRef inputTypes = inputType.cast().getTypes(); - - // Map input types to output types. - SmallVector outputTypes; - outputTypes.reserve(mapping.size()); - for (auto indexAttr : mapping.getAsRange()) { - int64_t index = indexAttr.getInt(); - if (index < 0 || index >= static_cast(inputTypes.size())) - return ::emitError(loc.value()) - << index << " is not a valid index into " << inputType; - Type mappedType = inputTypes[index]; - outputTypes.push_back(mappedType); - } - - // Create final tuple type. - auto outputType = TupleType::get(context, outputTypes); - inferredReturnTypes.push_back(outputType); - - return success(); -} - -/// Computes the type of the nested field of the given `type` identified by -/// `position`. Each entry `n` in the given index array `position` corresponds -/// to the `n`-th entry in that level. The function is thus implemented -/// recursively, where each recursion level extracts the type of the outer-most -/// level identified by the first index in the `position` array. -static FailureOr computeTypeAtPosition(Location loc, Type type, - ArrayRef position) { - if (position.empty()) - return type; - - // Recurse into tuple field of first index in position array. - if (auto tupleType = llvm::dyn_cast(type)) { - int64_t index = position[0]; - ArrayRef fieldTypes = tupleType.getTypes(); - if (index >= static_cast(fieldTypes.size()) || index < 0) - return emitError(loc) << index << " is not a valid index for " << type; - - return computeTypeAtPosition(loc, fieldTypes[index], position.drop_front()); - } - - return emitError(loc) << "can't extract element from type " << type; -} - -LogicalResult FieldReferenceOp::inferReturnTypes( - MLIRContext *context, std::optional loc, ValueRange operands, - DictionaryAttr attributes, OpaqueProperties properties, RegionRange regions, - llvm::SmallVectorImpl &inferredReturnTypes) { - auto *typedProperties = properties.as(); - if (!loc) - loc = UnknownLoc::get(context); - - // Extract field type at given position. - DenseI64ArrayAttr position = typedProperties->getPosition(); - Type inputType = operands[0].getType(); - FailureOr fieldType = - computeTypeAtPosition(loc.value(), inputType, position); - if (failed(fieldType)) - return ::emitError(loc.value()) - << "mismatching position and type (position: " << position - << ", type: " << inputType << ")"; - - inferredReturnTypes.push_back(fieldType.value()); - - return success(); -} - -LogicalResult FilterOp::verifyRegions() { - MLIRContext *context = getContext(); - Type si1 = IntegerType::get(context, /*width=*/1, IntegerType::Signed); - Region &condition = getCondition(); - - // Verify that type of yielded value is Boolean. - auto yieldOp = llvm::cast(condition.front().getTerminator()); - if (yieldOp.getValue().size() != 1) - return emitOpError() - << "must have 'condition' region yielding one value (yields " - << yieldOp.getValue().size() << ")"; - - Type yieldedType = yieldOp.getValue().getTypes()[0]; - if (yieldedType != si1) - return emitOpError() - << "must have 'condition' region yielding 'si1' (yields " - << yieldedType << ")"; - - // Verify that block has argument of input tuple type. - Type tupleType = getResult().getType(); - if (condition.getNumArguments() != 1 || - condition.getArgument(0).getType() != tupleType) { - InFlightDiagnostic diag = emitOpError() - << "must have 'condition' region taking " - << tupleType << " as argument (takes "; - if (condition.getNumArguments() == 0) - diag << "no arguments)"; - else - diag << condition.getArgument(0).getType() << ")"; - return diag; - } - - return success(); -} - -LogicalResult -LiteralOp::inferReturnTypes(MLIRContext *context, std::optional loc, - ValueRange operands, DictionaryAttr attributes, - OpaqueProperties properties, RegionRange regions, - llvm::SmallVectorImpl &inferredReturnTypes) { - auto *typedProperties = properties.as(); - - auto attr = llvm::dyn_cast(typedProperties->getValue()); - if (!attr) - return emitOptionalError(loc, "unsuited attribute for literal value: ", - typedProperties->getValue()); - - Type resultType = attr.getType(); - inferredReturnTypes.emplace_back(resultType); - - return success(); -} - -/// Verifies that the provided field names match the provided field types. While -/// the field types are potentially nested, the names are given in a single, -/// flat list and correspond to the field types in depth first order (where each -/// nested tuple-typed field has a name and its nested field have names on their -/// own). Furthermore, the names on each nesting level need to be unique. For -/// details, see -/// https://substrait.io/tutorial/sql_to_substrait/#types-and-schemas. -static FailureOr -verifyNamedStructHelper(Location loc, llvm::ArrayRef fieldNames, - TypeRange fieldTypes) { - int numConsumedNames = 0; - llvm::SmallSet currentLevelNames; - for (Type type : fieldTypes) { - // Check name of current field. - if (numConsumedNames >= static_cast(fieldNames.size())) - return emitError(loc, "not enough field names provided"); - auto currentName = llvm::cast(fieldNames[numConsumedNames]); - if (!currentLevelNames.insert(currentName).second) - return emitError(loc, llvm::Twine("duplicate field name: '") + - currentName.getValue() + "'"); - numConsumedNames++; - - // Recurse for nested structs/tuples. - if (auto tupleType = llvm::dyn_cast(type)) { - llvm::ArrayRef nestedFieldTypes = tupleType.getTypes(); - llvm::ArrayRef remainingNames = - fieldNames.drop_front(numConsumedNames); - FailureOr res = - verifyNamedStructHelper(loc, remainingNames, nestedFieldTypes); - if (failed(res)) - return failure(); - numConsumedNames += res.value(); - } - } - return numConsumedNames; -} - -static LogicalResult verifyNamedStruct(Operation *op, - llvm::ArrayRef fieldNames, - TupleType tupleType) { - Location loc = op->getLoc(); - TypeRange fieldTypes = tupleType.getTypes(); - - // Emits error message with context on failure. - auto emitErrorMessage = [&]() { - InFlightDiagnostic error = op->emitOpError() - << "has mismatching 'field_names' (["; - llvm::interleaveComma(fieldNames, error); - error << "]) and result type (" << tupleType << ")"; - return error; - }; - - // Call recursive verification function. - FailureOr numConsumedNames = - verifyNamedStructHelper(loc, fieldNames, fieldTypes); - - // Relay any failure. - if (failed(numConsumedNames)) - return emitErrorMessage(); - - // If we haven't consumed all names, we got too many of them, so report. - if (numConsumedNames.value() != static_cast(fieldNames.size())) { - InFlightDiagnostic error = emitErrorMessage(); - error.attachNote(loc) << "too many field names provided"; - return error; - } - - return success(); -} - -LogicalResult NamedTableOp::verify() { - llvm::ArrayRef fieldNames = getFieldNames().getValue(); - auto tupleType = llvm::cast(getResult().getType()); - return verifyNamedStruct(getOperation(), fieldNames, tupleType); -} - -LogicalResult PlanRelOp::verifyRegions() { - // Verify that we `yield` exactly one value. - auto yieldOp = llvm::cast(getBody().front().getTerminator()); - if (yieldOp.getValue().size() != 1) - return emitOpError() - << "must have 'body' region yielding one value (yields " - << yieldOp.getValue().size() << ")"; - - // Verify that the field names match the field types. If we don't have any, - // we're done. - if (!getFieldNames().has_value()) - return success(); - - // Otherwise, use helper to verify. - llvm::ArrayRef fieldNames = getFieldNames()->getValue(); - auto tupleType = llvm::cast(yieldOp.getValue().getTypes()[0]); - return verifyNamedStruct(getOperation(), fieldNames, tupleType); -} - -OpFoldResult ProjectOp::fold(FoldAdaptor adaptor) { - Operation *terminator = adaptor.getExpressions().front().getTerminator(); - - // If the region does not yield any values, the the `project` has no effect. - if (terminator->getNumOperands() == 0) { - return getInput(); - } - - return {}; -} - -LogicalResult ProjectOp::verifyRegions() { - // Verify that the expression block has a matching argument type. - auto inputTupleType = llvm::cast(getInput().getType()); - auto blockArgTypes = getExpressions().front().getArgumentTypes(); - if (blockArgTypes != ArrayRef(inputTupleType)) - return emitOpError() - << "has 'expressions' region with mismatching argument type" - << " (has: " << blockArgTypes << ", expected: " << inputTupleType - << ")"; - - // Verify that the input field types are a prefix of the output field types. - size_t numInputFields = inputTupleType.getTypes().size(); - auto outputTupleType = llvm::cast(getResult().getType()); - ArrayRef outputPrefixTypes = - outputTupleType.getTypes().take_front(numInputFields); - - if (inputTupleType.getTypes() != outputPrefixTypes) - return emitOpError() - << "has output field type whose prefix is different from " - << "input field types (" << inputTupleType.getTypes() << " vs " - << outputPrefixTypes << ")"; - - // Verify that yielded operands have the same types as the new output fields. - ArrayRef newFieldTypes = - outputTupleType.getTypes().drop_front(numInputFields); - auto yieldOp = llvm::cast(getExpressions().front().getTerminator()); - - if (yieldOp.getOperandTypes() != newFieldTypes) - return emitOpError() - << "has output field type whose new fields are different from " - << "the yielded operand types (" << newFieldTypes << " vs " - << yieldOp.getOperandTypes() << ")"; - - return success(); -} - -} // namespace substrait -} // namespace mlir - -//===----------------------------------------------------------------------===// -// Substrait types -//===----------------------------------------------------------------------===// - -#define GET_TYPEDEF_CLASSES -#include "structured/Dialect/Substrait/IR/SubstraitOpsTypes.cpp.inc" diff --git a/lib/Dialect/Substrait/Transforms/CMakeLists.txt b/lib/Dialect/Substrait/Transforms/CMakeLists.txt deleted file mode 100644 index ba4eef49b48c..000000000000 --- a/lib/Dialect/Substrait/Transforms/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_mlir_dialect_library(MLIRSubstraitTransforms - EmitDeduplication.cpp - - DEPENDS - MLIRSubstraitTransformsIncGen - - LINK_LIBS PUBLIC - MLIRIR - MLIRPass - MLIRRewrite - MLIRSubstraitDialect - MLIRTransforms - MLIRTransformUtils -) diff --git a/lib/Dialect/Substrait/Transforms/EmitDeduplication.cpp b/lib/Dialect/Substrait/Transforms/EmitDeduplication.cpp deleted file mode 100644 index 2b6e7281efc9..000000000000 --- a/lib/Dialect/Substrait/Transforms/EmitDeduplication.cpp +++ /dev/null @@ -1,512 +0,0 @@ -//===- EmitDeduplication.cpp - Impl. of emit deduplication ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "structured/Dialect/Substrait/Transforms/Passes.h" - -#include "mlir/IR/Dominance.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Transforms/CSE.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" - -namespace mlir::substrait { -#define GEN_PASS_DEF_SUBSTRAITEMITDEDUPLICATIONPASS -#include "structured/Dialect/Substrait/Transforms/Passes.h.inc" -} // namespace mlir::substrait - -using namespace llvm; -using namespace mlir; -using namespace mlir::substrait; - -namespace { - -struct SubstraitEmitDeduplicationPass - : substrait::impl::SubstraitEmitDeduplicationPassBase< - SubstraitEmitDeduplicationPass> { - using substrait::impl::SubstraitEmitDeduplicationPassBase< - SubstraitEmitDeduplicationPass>::SubstraitEmitDeduplicationPassBase; - - void runOnOperation() override; -}; - -void SubstraitEmitDeduplicationPass::runOnOperation() { - mlir::RewritePatternSet patterns(&getContext()); - populateEmitDeduplicationPatterns(patterns); - if (failed(mlir::applyPatternsAndFoldGreedily(getOperation(), - std::move(patterns)))) { - Location loc = getOperation()->getLoc(); - emitError(loc) << "emit deduplication: pattern application failed"; - signalPassFailure(); - } -} - -/// If the given `input` was produced by an `emit` op with duplicates, creates a -/// new `emit` op without duplicates and returns the result of the new `emit`. -/// Otherwise, i.e., if the `input` was not produced by an `emit` op or that op -/// did not have duplicates, returns the original `input`. In both cases, also -/// populates `reverseMapping` with the mapping that re-establishes the original -/// order of the fields from the deduplicated order and returns the number of -/// fields after deduplication and whether the `input` was deduplicated. -std::tuple -createDeduplicatingEmit(Value input, SmallVector &reverseMapping, - PatternRewriter &rewriter) { - // Handles the bases cases where the input either has no `emit` op or an - // `emit` op with no duplicates. In that case, the returned value is just the - // `input` and the reverse mapping is just the identity. - auto handleNoDuplicates = [&]() { - int64_t numInputFields = cast(input.getType()).getTypes().size(); - for (auto i : seq(numInputFields)) - reverseMapping.push_back(i); - return std::tuple{input, numInputFields, false}; - }; - - // Input is not an 'emit' op: handle base case. - auto emitOp = llvm::dyn_cast_if_present(input.getDefiningOp()); - if (!emitOp) - return handleNoDuplicates(); - - // Compute the new mapping without duplicates as well as, for each position in - // the old mapping, the position in the new mapping. - ArrayAttr oldInputMapping = emitOp.getMapping(); - SmallVector newInputMapping; - SmallVector oldToNewInputMapping; - { - llvm::DenseMap indexPositions; - oldToNewInputMapping.reserve(oldInputMapping.size()); - for (auto [i, attr] : enumerate(oldInputMapping)) { - int64_t index = cast(attr).getInt(); - auto [it, success] = indexPositions.try_emplace(index, i); - if (success) - newInputMapping.push_back(index); - oldToNewInputMapping.push_back(it->second); - } - } - - // If the new and old input mappings have the same size, then there are no - // duplicates, so we handle it as a base case. - if (newInputMapping.size() == oldInputMapping.size()) - return handleNoDuplicates(); - - // Compute the mapping that re-establishes the original emit order. - reverseMapping.reserve(reverseMapping.size() + newInputMapping.size()); - { - // Compute the reverse mapping of the input. - SmallVector reverseInputMapping(oldInputMapping.size()); - for (auto [i, index] : enumerate(newInputMapping)) - reverseInputMapping[index] = i; - - // The first fields of the reverse mapping reverse the effect of the - // deduplication of the emit op on the input. - for (auto [i, attr] : - enumerate(oldInputMapping.getAsRange())) { - int64_t reverseIndex = reverseInputMapping[attr.getInt()]; - reverseMapping.push_back(reverseIndex); - } - } - - // If we did have duplicates, add an `emit` op that deduplicates the input. - Location loc = emitOp.getLoc(); - ArrayAttr newInputMappingAttr = rewriter.getI64ArrayAttr(newInputMapping); - auto newEmitOp = - rewriter.create(loc, emitOp.getInput(), newInputMappingAttr); - - return {newEmitOp, newInputMapping.size(), true}; -} - -/// Deduplicates the fields of the region with a single `Tuple` argument using -/// the provided (deduplicating) mapping. This involves changing the type of the -/// region argument to the provided `newElementType`, which must be the type -/// obtained by applying deduplication to the argument type of the provided -/// `region`, as well as changing all `field_reference` ops using the region -/// argument to work on the deduplicated type. -// TODO(ingomueller): We could add an overload for this function that computes -// `newElementType` from the type of the region argument and the mapping. -void deduplicateRegionArgs(Region ®ion, ArrayAttr newMapping, - Type newElementType, PatternRewriter &rewriter) { - assert(region.getNumArguments() == 1 && - "only regions with 1 argument are supported"); - auto oldElementType = cast(region.getArgument(0).getType()); - int64_t numOldFields = oldElementType.getTypes().size(); - - // For each position in the original input type, compute which position it - // corresponds to in the deduplicated input. This is required for replacing - // field references to the original type with references to the deduplicated - // type. - SmallVector oldToNewPositions; - oldToNewPositions.reserve(numOldFields); - { - llvm::DenseMap indexPositions; - for (auto attr : newMapping.getAsRange()) { - int64_t index = attr.getInt(); - int64_t pos = indexPositions.size(); - auto [it, success] = indexPositions.try_emplace(index, pos); - oldToNewPositions.push_back(it->second); - } - } - - // Update field references using the region argument. - for (Operation *user : region.getArgument(0).getUsers()) { - // We are only interested in `field_reference` ops. - if (!isa(user)) - continue; - auto refOp = cast(user); - - // Compute new position array from the old one. - ArrayRef oldPositions = refOp.getPosition(); - SmallVector newPositions; - newPositions.reserve(oldPositions.size()); - for (auto index : oldPositions) - newPositions.push_back(index); - newPositions[0] = oldToNewPositions[newPositions[0]]; - - // Update op in place. - refOp.setPosition(newPositions); - } - - // Update argument type of the region. - region.getArgument(0).setType(newElementType); -} - -struct EliminateDuplicateYieldsInProjectPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(ProjectOp op, - PatternRewriter &rewriter) const override { - MLIRContext *context = op.getContext(); - Operation *terminator = op.getExpressions().front().getTerminator(); - int64_t numOriginalYields = terminator->getNumOperands(); - auto inputTupleType = cast(op.getInput().getType()); - - // Determine duplicate values in `yield` and remember the first ocurrence of - // each value. - llvm::DenseMap valuePositions; - for (Value value : terminator->getOperands()) - valuePositions.try_emplace(value, valuePositions.size()); - - if (valuePositions.size() == numOriginalYields) - return rewriter.notifyMatchFailure(op, "does not yield duplicate values"); - - // Create a mapping from the de-duplicated values that re-establishes the - // original emit order. The input fields are just forwarded, so create - // identity prefix. - SmallVector reverseMapping; - reverseMapping.reserve(inputTupleType.size() + numOriginalYields); - append_range(reverseMapping, iota_range(0, inputTupleType.size(), - /*Inclusive=*/false)); - - // Reverse mapping: The fields added by the `expression` regions are now - // de-duplicated, so we need to reverse the effect of the deduplication, - // taking the prefix into account. - for (Value value : terminator->getOperands()) { - int64_t pos = valuePositions[value]; - reverseMapping.push_back(inputTupleType.size() + pos); - } - - // Remove duplicate values in `yield` op of the `expressions` region. - { - SmallVector values; - values.reserve(valuePositions.size()); - for (auto [value, pos] : valuePositions) - values.push_back(value); - - PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPointAfter(terminator); - terminator = rewriter.replaceOpWithNewOp(terminator, values); - } - - // Compute deduplicated output field types. - SmallVector outputTypes; - int64_t numNewYields = terminator->getNumOperands(); - outputTypes.reserve(inputTupleType.size() + numNewYields); - append_range(outputTypes, inputTupleType.getTypes()); - append_range(outputTypes, terminator->getOperandTypes()); - auto newOutputType = TupleType::get(context, outputTypes); - - // Create new `project` op with updated region and output type. - auto newOp = - rewriter.create(op.getLoc(), newOutputType, op.getInput()); - rewriter.inlineRegionBefore(op.getExpressions(), newOp.getExpressions(), - newOp.getExpressions().end()); - - // Create `emit` op with the reverse mapping. - ArrayAttr reverseMappingAttr = rewriter.getI64ArrayAttr(reverseMapping); - rewriter.replaceOpWithNewOp(op, newOp, reverseMappingAttr); - - return success(); - } -}; - -struct EliminateIdentityYieldsInProjectPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(ProjectOp op, - PatternRewriter &rewriter) const override { - MLIRContext *context = op.getContext(); - Operation *terminator = op.getExpressions().front().getTerminator(); - auto inputTupleType = cast(op.getInput().getType()); - auto resultTupleType = cast(op.getResult().getType()); - int64_t numInputFields = inputTupleType.size(); - - // Look for yielded values that are just forwarding input fields. - SmallVector newYields; - SmallVector mapping; - mapping.reserve(resultTupleType.size()); - append_range(mapping, seq(numInputFields)); - for (auto [i, value] : enumerate(terminator->getOperands())) { - // Test if this is a `field_reference` op that refers a top-level field. - auto refOp = value.getDefiningOp(); - if (refOp && refOp.getPosition().size() == 1) { - // Test if it refers to the block argument of the `expression` region. - auto arg = dyn_cast(refOp.getContainer()); - if (arg && arg.getOwner() == &op.getExpressions().front()) { - // This is a references forwarding a top-level field, so we'll express - // that with an `emit` op reordering the result of this op. - mapping.push_back(refOp.getPosition().front()); - continue; - } - } - - // This is not just a forwarding an input field, so we keep it. - mapping.push_back(numInputFields + newYields.size()); - newYields.push_back(value); - } - - if (newYields.size() == terminator->getNumOperands()) - return rewriter.notifyMatchFailure( - op, "does not yield unmodified input fields"); - - // Change the `yield` op to yield only those values we want to keep. - { - PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPointAfter(terminator); - terminator = rewriter.replaceOpWithNewOp(terminator, newYields); - } - - // Compute deduplicated output field types. - SmallVector outputTypes; - int64_t numNewYields = terminator->getNumOperands(); - outputTypes.reserve(inputTupleType.size() + numNewYields); - append_range(outputTypes, inputTupleType.getTypes()); - append_range(outputTypes, terminator->getOperandTypes()); - auto newOutputType = TupleType::get(context, outputTypes); - - // Create new `project` op with updated region. - auto newOp = - rewriter.create(op.getLoc(), newOutputType, op.getInput()); - rewriter.inlineRegionBefore(op.getExpressions(), newOp.getExpressions(), - newOp.getExpressions().end()); - - // Create `emit` op with a mapping that recreates the fields we removed. - ArrayAttr reverseMappingAttr = rewriter.getI64ArrayAttr(mapping); - rewriter.replaceOpWithNewOp(op, newOp, reverseMappingAttr); - - return success(); - } -}; - -/// Pushes duplicates in the mappings of `emit` ops producing either of the two -/// inputs through the `cross` op. This works by introducing new emit ops -/// without the duplicates, creating a new `cross` op that uses them, and -/// finally a new `emit` op that maps back to the original order. -struct PushDuplicatesThroughCrossPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(CrossOp op, - PatternRewriter &rewriter) const override { - bool isLeftEmit = isa_and_present(op.getLeft().getDefiningOp()); - bool isRightEmit = isa_and_present(op.getRight().getDefiningOp()); - if (!isLeftEmit && !isRightEmit) - return rewriter.notifyMatchFailure( - op, "none of the operands is an 'emit' op"); - - // Create input ops for the new `cross` op. These may be the original inputs - // or `emit` ops that remove duplicates. - - // Left input: the reverse mapping of the left input works as the prefix of - // the reverse mapping of the new `cross` op. - SmallVector reverseMapping; - auto [newLeftInput, numLeftIndices, leftHasDuplicates] = - createDeduplicatingEmit(op.getLeft(), reverseMapping, rewriter); - - // Right input: the reverse mapping of the right input needs to be adjusted - // by the number of deduplicated fields in the left input. - int64_t numLeftOriginalindices = reverseMapping.size(); - auto [newRightInput, numRightIndices, rightHasDuplicates] = - createDeduplicatingEmit(op.getRight(), reverseMapping, rewriter); - for (int64_t &idx : drop_begin(reverseMapping, numLeftOriginalindices)) - idx += numLeftIndices; - - if (!leftHasDuplicates && !rightHasDuplicates) - // Note: if we end up failing here, then both invocations of - // `createDeduplicatingEmit` returned without creating a new (`emit`) op. - return rewriter.notifyMatchFailure( - op, "none of the 'emit' inputs have duplicates"); - - // Create new cross op with the two deduplicated inputs. - auto newOp = - rewriter.create(op.getLoc(), newLeftInput, newRightInput); - - // Replace old cross op with emit op that maps back to old emit order. - ArrayAttr reverseMappingAttr = rewriter.getI64ArrayAttr(reverseMapping); - rewriter.replaceOpWithNewOp(op, newOp, reverseMappingAttr); - - return success(); - } -}; - -/// Pushes duplicates in the mappings of `emit` ops producing the input through -/// the `filter` op. This works by introducing a new `emit` op without the -/// duplicates, creating a new `filter` op updated to work on the deduplicated -/// element type, and finally a new `emit` op that maps back to the original -/// order. -struct PushDuplicatesThroughFilterPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(FilterOp op, - PatternRewriter &rewriter) const override { - auto emitOp = op.getInput().getDefiningOp(); - if (!emitOp) - return rewriter.notifyMatchFailure( - op, "input operand is not produced by an 'emit' op"); - - // Create input ops for the new `filter` op. These may be the original - // inputs or `emit` ops that remove duplicates. - SmallVector reverseMapping; - auto [newInput, numDedupIndices, hasDuplicates] = - createDeduplicatingEmit(op.getInput(), reverseMapping, rewriter); - - if (!hasDuplicates) - // Note: if we end up failing here, then the invokation of - // `createDeduplicatingEmit` returned without creating a new (`emit`) op. - return rewriter.notifyMatchFailure( - op, "the 'emit' input does not have duplicates"); - - // Create new `filter` op. Move over the `condition` region. This needs to - // happen now because replacing the op will destroy the region. - auto newOp = rewriter.create(op.getLoc(), newInput); - rewriter.inlineRegionBefore(op.getCondition(), newOp.getCondition(), - newOp.getCondition().end()); - - // Update the `condition` region. - deduplicateRegionArgs(newOp.getCondition(), emitOp.getMapping(), - newInput.getType(), rewriter); - - // Deduplicating block args may create common subexpressions. Eliminate - // them immediately. - { - DominanceInfo domInfo; - mlir::eliminateCommonSubExpressions(rewriter, domInfo, newOp); - } - - // Replace the old `filter` op with a new `emit` op that maps back to the - // original emit order. - ArrayAttr reverseMappingAttr = rewriter.getI64ArrayAttr(reverseMapping); - rewriter.replaceOpWithNewOp(op, newOp, reverseMappingAttr); - - return failure(); - } -}; - -/// Pushes duplicates in the mappings of `emit` ops producing the input through -/// the `filter` op. This works by introducing a new `emit` op without the -/// duplicates, creating a new `filter` op updated to work on the deduplicated -/// element type, and finally a new `emit` op that maps back to the original -/// order. -struct PushDuplicatesThroughProjectPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(ProjectOp op, - PatternRewriter &rewriter) const override { - auto emitOp = op.getInput().getDefiningOp(); - if (!emitOp) - return rewriter.notifyMatchFailure( - op, "input operand is not produced by an 'emit' op"); - - // Create input ops for the new `project` op. These may be the original - // inputs or `emit` ops that remove duplicates. - SmallVector reverseMapping; - auto [newInput, numDedupIndices, hasDuplicates] = - createDeduplicatingEmit(op.getInput(), reverseMapping, rewriter); - - if (!hasDuplicates) - // Note: if we end up failing here, then the invokation of - // `createDeduplicatingEmit` returned without creating a new (`emit`) op. - return rewriter.notifyMatchFailure( - op, "the 'emit' input does not have duplicates"); - - MLIRContext *context = op.getContext(); - - // Compute deduplicated output field types. - Operation *terminator = op.getExpressions().front().getTerminator(); - auto newInputTupleType = cast(newInput.getType()); - - SmallVector outputTypes; - outputTypes.reserve(newInputTupleType.size() + - terminator->getNumOperands()); - append_range(outputTypes, newInputTupleType.getTypes()); - append_range(outputTypes, terminator->getOperandTypes()); - auto newOutputType = TupleType::get(context, outputTypes); - - // Create new `project` op. Move over the `expressions` region. This needs - // to happen now because replacing the op will destroy the region. - auto newOp = - rewriter.create(op.getLoc(), newOutputType, newInput); - rewriter.inlineRegionBefore(op.getExpressions(), newOp.getExpressions(), - newOp.getExpressions().end()); - - // Update the `condition` region. - deduplicateRegionArgs(newOp.getExpressions(), emitOp.getMapping(), - newInput.getType(), rewriter); - - // Deduplicating block args may create common subexpressions. Eliminate - // them immediately. - { - DominanceInfo domInfo; - mlir::eliminateCommonSubExpressions(rewriter, domInfo, newOp); - } - - // Compute output indices for the expressions added by the region. - int64_t numTotalIndices = numDedupIndices + terminator->getNumOperands(); - append_range(reverseMapping, seq(numDedupIndices, numTotalIndices)); - - // Replace the old `project` op with a new `emit` op that maps back to the - // original emit order. - ArrayAttr reverseMappingAttr = rewriter.getI64ArrayAttr(reverseMapping); - rewriter.replaceOpWithNewOp(op, newOp, reverseMappingAttr); - - return failure(); - } -}; - -} // namespace - -namespace mlir { -namespace substrait { - -void populateEmitDeduplicationPatterns(RewritePatternSet &patterns) { - MLIRContext *context = patterns.getContext(); - patterns.add< - // clang-format off - EliminateDuplicateYieldsInProjectPattern, - EliminateIdentityYieldsInProjectPattern, - PushDuplicatesThroughCrossPattern, - PushDuplicatesThroughFilterPattern, - PushDuplicatesThroughProjectPattern - // clang-format on - >(context); -} - -std::unique_ptr createEmitDeduplicationPass() { - return std::make_unique(); -} - -} // namespace substrait -} // namespace mlir diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt deleted file mode 100644 index cace296c633c..000000000000 --- a/lib/Target/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(SubstraitPB) diff --git a/lib/Target/SubstraitPB/CMakeLists.txt b/lib/Target/SubstraitPB/CMakeLists.txt deleted file mode 100644 index 4e0301e69010..000000000000 --- a/lib/Target/SubstraitPB/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -add_mlir_translation_library(MLIRTargetSubstraitPB - Export.cpp - Import.cpp - ProtobufUtils.cpp - - LINK_LIBS PUBLIC - MLIRIR - MLIRSubstraitDialect - MLIRSupport - MLIRTranslateLib - substrait_proto - protobuf::libprotobuf - ) diff --git a/lib/Target/SubstraitPB/Export.cpp b/lib/Target/SubstraitPB/Export.cpp deleted file mode 100644 index b58ebf099e93..000000000000 --- a/lib/Target/SubstraitPB/Export.cpp +++ /dev/null @@ -1,803 +0,0 @@ -//===-- Export.cpp - Export Substrait dialect to protobuf -------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "structured/Target/SubstraitPB/Export.h" -#include "ProtobufUtils.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/Support/LogicalResult.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" -#include "structured/Target/SubstraitPB/Options.h" -#include "llvm/ADT/TypeSwitch.h" - -#include -#include -#include -#include -#include -#include - -using namespace mlir; -using namespace mlir::substrait; -using namespace ::substrait; -using namespace ::substrait::proto; - -namespace pb = google::protobuf; - -namespace { - -/// Main structure to drive export from the dialect to protobuf. This class -/// holds the visitor functions for the various ops etc. from the dialect as -/// well as state and utilities around the state that is built up during export. -class SubstraitExporter { -public: -// Declaration for the export function of the given operation type. -// -// We need one such function for most op type that we want to export. The -// `MESSAGE_TYPE` argument corresponds to the protobuf message type returned -// by the function. -#define DECLARE_EXPORT_FUNC(OP_TYPE, MESSAGE_TYPE) \ - FailureOr> exportOperation(OP_TYPE op); - - DECLARE_EXPORT_FUNC(CallOp, Expression) - DECLARE_EXPORT_FUNC(CrossOp, Rel) - DECLARE_EXPORT_FUNC(EmitOp, Rel) - DECLARE_EXPORT_FUNC(ExpressionOpInterface, Expression) - DECLARE_EXPORT_FUNC(FieldReferenceOp, Expression) - DECLARE_EXPORT_FUNC(FilterOp, Rel) - DECLARE_EXPORT_FUNC(LiteralOp, Expression) - DECLARE_EXPORT_FUNC(ModuleOp, Plan) - DECLARE_EXPORT_FUNC(NamedTableOp, Rel) - DECLARE_EXPORT_FUNC(PlanOp, Plan) - DECLARE_EXPORT_FUNC(ProjectOp, Rel) - DECLARE_EXPORT_FUNC(RelOpInterface, Rel) - - FailureOr> exportOperation(Operation *op); - FailureOr> exportType(Location loc, - mlir::Type mlirType); - -private: - /// Returns the nearest symbol table to op. The symbol table is cached in - /// `this` such that repeated calls that request the same symbol do not - /// rebuild that table. - SymbolTable &getSymbolTableFor(Operation *op) { - Operation *nearestSymbolTableOp = SymbolTable::getNearestSymbolTable(op); - if (!symbolTable || symbolTable->getOp() != nearestSymbolTableOp) { - symbolTable = std::make_unique(nearestSymbolTableOp); - } - return *symbolTable; - } - - /// Looks up the anchor value corresponding to the given symbol name in the - /// context of the given op. The op is used to determine which symbol table - /// was used to assign anchors. - template - int32_t lookupAnchor(Operation *contextOp, const SymNameType &symName) { - SymbolTable &symbolTable = getSymbolTableFor(contextOp); - Operation *calleeOp = symbolTable.lookup(symName); - return anchorsByOp.at(calleeOp); - } - - DenseMap anchorsByOp{}; // Maps anchors to ops. - std::unique_ptr symbolTable; // Symbol table cache. -}; - -FailureOr> -SubstraitExporter::exportType(Location loc, mlir::Type mlirType) { - MLIRContext *context = mlirType.getContext(); - - // Handle SI1. - auto si1 = IntegerType::get(context, 1, IntegerType::Signed); - if (mlirType == si1) { - // TODO(ingomueller): support other nullability modes. - auto i1Type = std::make_unique(); - i1Type->set_nullability( - Type_Nullability::Type_Nullability_NULLABILITY_REQUIRED); - - auto type = std::make_unique(); - type->set_allocated_bool_(i1Type.release()); - return std::move(type); - } - - // Handle SI32. - auto si32 = IntegerType::get(context, 32, IntegerType::Signed); - if (mlirType == si32) { - // TODO(ingomueller): support other nullability modes. - auto i32Type = std::make_unique(); - i32Type->set_nullability( - Type_Nullability::Type_Nullability_NULLABILITY_REQUIRED); - - auto type = std::make_unique(); - type->set_allocated_i32(i32Type.release()); - return std::move(type); - } - - if (auto tupleType = llvm::dyn_cast(mlirType)) { - auto structType = std::make_unique(); - for (mlir::Type fieldType : tupleType.getTypes()) { - // Convert field type recursively. - FailureOr> type = exportType(loc, fieldType); - if (failed(type)) - return failure(); - *structType->add_types() = *type.value(); - } - - auto type = std::make_unique(); - type->set_allocated_struct_(structType.release()); - return std::move(type); - } - - // TODO(ingomueller): Support other types. - return emitError(loc) << "could not export unsupported type " << mlirType; -} - -FailureOr> -SubstraitExporter::exportOperation(CallOp op) { - using ScalarFunction = Expression::ScalarFunction; - - Location loc = op.getLoc(); - - // Build `ScalarFunction` message. - // TODO(ingomueller): Support other `*Function` messages. - auto scalarFunction = std::make_unique(); - int32_t anchor = lookupAnchor(op, op.getCallee()); - scalarFunction->set_function_reference(anchor); - - // Build messages for arguments. - for (auto [i, operand] : llvm::enumerate(op->getOperands())) { - // Build `Expression` message for operand. - auto definingOp = llvm::dyn_cast_if_present( - operand.getDefiningOp()); - if (!definingOp) - return op->emitOpError() - << "with operand " << i - << " that was not produced by Substrait relation op"; - - FailureOr> expression = - exportOperation(definingOp); - if (failed(expression)) - return failure(); - - // Build `FunctionArgument` message and add to arguments. - FunctionArgument arg; - arg.set_allocated_value(expression->release()); - *scalarFunction->add_arguments() = arg; - } - - // Build message for `output_type`. - FailureOr> outputType = - exportType(loc, op.getResult().getType()); - if (failed(outputType)) - return failure(); - scalarFunction->set_allocated_output_type(outputType->release()); - - // Build `Expression` message. - auto expression = std::make_unique(); - expression->set_allocated_scalar_function(scalarFunction.release()); - - return expression; -} - -FailureOr> SubstraitExporter::exportOperation(CrossOp op) { - // Build `RelCommon` message. - auto relCommon = std::make_unique(); - auto direct = std::make_unique(); - relCommon->set_allocated_direct(direct.release()); - - // Build `left` input message. - auto leftOp = - llvm::dyn_cast_if_present(op.getLeft().getDefiningOp()); - if (!leftOp) - return op->emitOpError( - "left input was not produced by Substrait relation op"); - - FailureOr> leftRel = exportOperation(leftOp); - if (failed(leftRel)) - return failure(); - - // Build `right` input message. - auto rightOp = - llvm::dyn_cast_if_present(op.getRight().getDefiningOp()); - if (!rightOp) - return op->emitOpError( - "right input was not produced by Substrait relation op"); - - FailureOr> rightRel = exportOperation(rightOp); - if (failed(rightRel)) - return failure(); - - // Build `CrossRel` message. - auto crossRel = std::make_unique(); - crossRel->set_allocated_common(relCommon.release()); - crossRel->set_allocated_left(leftRel->release()); - crossRel->set_allocated_right(rightRel->release()); - - // Build `Rel` message. - auto rel = std::make_unique(); - rel->set_allocated_cross(crossRel.release()); - - return rel; -} - -FailureOr> SubstraitExporter::exportOperation(EmitOp op) { - auto inputOp = - dyn_cast_if_present(op.getInput().getDefiningOp()); - if (!inputOp) - return op->emitOpError( - "has input that was not produced by Substrait relation op"); - - // Export input op. - FailureOr> inputRel = exportOperation(inputOp); - if (failed(inputRel)) - return failure(); - - // Build the `emit` message. - auto emit = std::make_unique(); - for (auto intAttr : op.getMapping().getAsRange()) - emit->add_output_mapping(intAttr.getInt()); - - // Attach the `emit` message to the `RelCommon` message. - FailureOr relCommon = - protobuf_utils::getMutableCommon(inputRel->get(), op.getLoc()); - if (failed(relCommon)) - return failure(); - - if (relCommon.value()->has_emit()) { - InFlightDiagnostic diag = - op->emitOpError("has 'input' that already has 'emit' message " - "(try running canonicalization?)"); - diag.attachNote(inputOp.getLoc()) << "op exported to 'input' message"; - return diag; - } - - relCommon.value()->set_allocated_emit(emit.release()); - - return inputRel; -} - -FailureOr> -SubstraitExporter::exportOperation(ExpressionOpInterface op) { - return llvm::TypeSwitch>>( - op) - .Case( - [&](auto op) { return exportOperation(op); }) - .Default( - [](auto op) { return op->emitOpError("not supported for export"); }); -} - -FailureOr> -SubstraitExporter::exportOperation(FieldReferenceOp op) { - using FieldReference = Expression::FieldReference; - using ReferenceSegment = Expression::ReferenceSegment; - - // Build linked list of `ReferenceSegment` messages. - // TODO: support masked references. - std::unique_ptr referenceRoot; - for (int64_t pos : llvm::reverse(op.getPosition())) { - // Remember child segment and create new `ReferenceSegment` message. - auto childReference = std::move(referenceRoot); - referenceRoot = std::make_unique(); - - // Create `StructField` message. - // TODO(ingomueller): support other segment types. - auto structField = std::make_unique(); - structField->set_field(pos); - structField->set_allocated_child(childReference.release()); - - referenceRoot->set_allocated_struct_field(structField.release()); - } - - // Build `FieldReference` message. - auto fieldReference = std::make_unique(); - fieldReference->set_allocated_direct_reference(referenceRoot.release()); - - // Handle different `root_type`s. - Value inputVal = op.getContainer(); - if (Operation *definingOp = inputVal.getDefiningOp()) { - // If there is a defining op, the `root_type` is an `Expression`. - ExpressionOpInterface exprOp = - llvm::dyn_cast(definingOp); - if (!exprOp) - return op->emitOpError("has 'container' operand that was not produced by " - "Substrait expression"); - - FailureOr> expression = exportOperation(exprOp); - if (failed(expression)) - return failure(); - - fieldReference->set_allocated_expression(expression->release()); - } else { - // Input must be a `BlockArgument`. Only support root references for now. - auto blockArg = llvm::cast(inputVal); - if (blockArg.getOwner() != op->getBlock()) - // TODO(ingomueller): support outer reference type. - return op.emitOpError("has unsupported outer reference"); - - auto rootReference = std::make_unique(); - fieldReference->set_allocated_root_reference(rootReference.release()); - } - - // Build `Expression` message. - auto expression = std::make_unique(); - expression->set_allocated_selection(fieldReference.release()); - - return expression; -} - -FailureOr> -SubstraitExporter::exportOperation(FilterOp op) { - // Build `RelCommon` message. - auto relCommon = std::make_unique(); - auto direct = std::make_unique(); - relCommon->set_allocated_direct(direct.release()); - - // Build input `Rel` message. - auto inputOp = - llvm::dyn_cast_if_present(op.getInput().getDefiningOp()); - if (!inputOp) - return op->emitOpError("input was not produced by Substrait relation op"); - - FailureOr> inputRel = exportOperation(inputOp); - if (failed(inputRel)) - return failure(); - - // Build condition `Expression` message. - auto yieldOp = llvm::cast(op.getCondition().front().getTerminator()); - // TODO(ingomueller): There can be cases where there isn't a defining op but - // the region argument is returned directly. Support that. - assert(yieldOp.getValue().size() == 1 && - "fitler op must yield exactly one value"); - auto conditionOp = llvm::dyn_cast_if_present( - yieldOp.getValue().front().getDefiningOp()); - if (!conditionOp) - return op->emitOpError("condition not supported for export: yielded op was " - "not produced by Substrait expression op"); - FailureOr> condition = - exportOperation(conditionOp); - if (failed(condition)) - return failure(); - - // Build `FilterRel` message. - auto filterRel = std::make_unique(); - filterRel->set_allocated_common(relCommon.release()); - filterRel->set_allocated_input(inputRel->release()); - filterRel->set_allocated_condition(condition->release()); - - // Build `Rel` message. - auto rel = std::make_unique(); - rel->set_allocated_filter(filterRel.release()); - - return rel; -} - -FailureOr> -SubstraitExporter::exportOperation(LiteralOp op) { - // Build `Literal` message depending on type. - auto value = llvm::cast(op.getValue()); - mlir::Type literalType = value.getType(); - auto literal = std::make_unique(); - - // `IntegerType`s. - if (auto intType = dyn_cast(literalType)) { - if (!intType.isSigned()) - op->emitOpError("has integer value with unsupported signedness"); - switch (intType.getWidth()) { - case 1: - literal->set_boolean(value.cast().getSInt()); - break; - case 32: - // TODO(ingomueller): Add tests when we can express plans that use i32. - literal->set_i32(value.cast().getSInt()); - break; - default: - op->emitOpError("has integer value with unsupported width"); - } - } else - op->emitOpError("has unsupported value"); - - // Build `Expression` message. - auto expression = std::make_unique(); - expression->set_allocated_literal(literal.release()); - - return expression; -} - -FailureOr> -SubstraitExporter::exportOperation(ModuleOp op) { - if (!op->getAttrs().empty()) { - op->emitOpError("has attributes"); - return failure(); - } - - Region &body = op.getBodyRegion(); - if (llvm::range_size(body.getOps()) != 1) { - op->emitOpError("has more than one op in its body"); - return failure(); - } - - if (auto plan = llvm::dyn_cast(&*body.op_begin())) - return exportOperation(plan); - - op->emitOpError("contains an op that is not a 'substrait.plan'"); - return failure(); -} - -FailureOr> -SubstraitExporter::exportOperation(NamedTableOp op) { - Location loc = op.getLoc(); - - // Build `NamedTable` message. - auto namedTable = std::make_unique(); - namedTable->add_names(op.getTableName().getRootReference().str()); - for (SymbolRefAttr attr : op.getTableName().getNestedReferences()) { - namedTable->add_names(attr.getLeafReference().str()); - } - - // Build `RelCommon` message. - auto relCommon = std::make_unique(); - auto direct = std::make_unique(); - relCommon->set_allocated_direct(direct.release()); - - // Build `Struct` message. - auto struct_ = std::make_unique(); - struct_->set_nullability( - Type_Nullability::Type_Nullability_NULLABILITY_REQUIRED); - auto tupleType = llvm::cast(op.getResult().getType()); - for (mlir::Type fieldType : tupleType.getTypes()) { - FailureOr> type = exportType(loc, fieldType); - if (failed(type)) - return (failure()); - *struct_->add_types() = *std::move(type.value()); - } - - // Build `NamedStruct` message. - auto namedStruct = std::make_unique(); - namedStruct->set_allocated_struct_(struct_.release()); - for (Attribute attr : op.getFieldNames()) { - namedStruct->add_names(attr.cast().getValue().str()); - } - - // Build `ReadRel` message. - auto readRel = std::make_unique(); - readRel->set_allocated_common(relCommon.release()); - readRel->set_allocated_base_schema(namedStruct.release()); - readRel->set_allocated_named_table(namedTable.release()); - - // Build `Rel` message. - auto rel = std::make_unique(); - rel->set_allocated_read(readRel.release()); - - return rel; -} - -/// Helper for creating unique anchors from symbol names. While in MLIR, symbol -/// names and their references are strings, in Substrait they are integer -/// numbers. In order to preserve the anchor values through an import/export -/// process (without modifications), the symbol names generated during import -/// have the form `.` such that the `anchor` value can be -/// recovered. During assigning of anchors, the uniquer fills a map mapping the -/// symbol ops to the assigned anchor values such that uses of the symbol can -/// look them up. -class AnchorUniquer { -public: - AnchorUniquer(StringRef prefix, DenseMap &anchorsByOp) - : prefix(prefix), anchorsByOp(anchorsByOp) {} - - /// Assign a unique anchor to the given op and register the result in the - /// mapping. - template - int32_t assignAnchor(OpTy op) { - StringRef symName = op.getSymName(); - int32_t anchor; - { - // Attempt to recover the anchor from the symbol name. - if (!symName.starts_with(prefix) || - symName.drop_front(prefix.size()).getAsInteger(10, anchor)) { - // If that fails, find one that isn't used yet. - anchor = nextAnchor; - } - // Ensure uniqueness either way. - while (anchors.contains(anchor)) - anchor = nextAnchor++; - } - anchors.insert(anchor); - auto [_, hasInserted] = anchorsByOp.try_emplace(op, anchor); - assert(hasInserted && "op had already been assigned an anchor"); - return anchor; - } - -private: - StringRef prefix; - DenseMap &anchorsByOp; // Maps ops to anchor values. - DenseSet anchors; // Already assigned anchors. - int32_t nextAnchor{0}; // Next anchor candidate. -}; - -/// Traits for common handling of `ExtensionFunctionOp`, `ExtensionTypeOp`, and -/// `ExtensionTypeVariationOp`. While their corresponding protobuf message types -/// are structurally the same, they are (1) different classes and (2) have -/// different field names. The Trait thus provides the message type class as -/// well as accessors for that class for each of the op types. -template -struct ExtensionOpTraits; - -template <> -struct ExtensionOpTraits { - using ExtensionMessageType = - extensions::SimpleExtensionDeclaration::ExtensionFunction; - static void setAnchor(ExtensionMessageType &ext, int32_t anchor) { - ext.set_function_anchor(anchor); - } - static ExtensionMessageType * - getMutableExtension(extensions::SimpleExtensionDeclaration &decl) { - return decl.mutable_extension_function(); - } -}; - -template <> -struct ExtensionOpTraits { - using ExtensionMessageType = - extensions::SimpleExtensionDeclaration::ExtensionType; - static void setAnchor(ExtensionMessageType &ext, int32_t anchor) { - ext.set_type_anchor(anchor); - } - static ExtensionMessageType * - getMutableExtension(extensions::SimpleExtensionDeclaration &decl) { - return decl.mutable_extension_type(); - } -}; - -template <> -struct ExtensionOpTraits { - using ExtensionMessageType = - extensions::SimpleExtensionDeclaration::ExtensionTypeVariation; - static void setAnchor(ExtensionMessageType &ext, int32_t anchor) { - ext.set_type_variation_anchor(anchor); - } - static ExtensionMessageType * - getMutableExtension(extensions::SimpleExtensionDeclaration &decl) { - return decl.mutable_extension_type_variation(); - } -}; - -FailureOr> SubstraitExporter::exportOperation(PlanOp op) { - using extensions::SimpleExtensionDeclaration; - using extensions::SimpleExtensionURI; - - // Build `Version` message. - auto version = std::make_unique(); - version->set_major_number(op.getMajorNumber()); - version->set_minor_number(op.getMinorNumber()); - version->set_patch_number(op.getPatchNumber()); - version->set_producer(op.getProducer().str()); - version->set_git_hash(op.getGitHash().str()); - - // Build `Plan` message. - auto plan = std::make_unique(); - plan->set_allocated_version(version.release()); - - // Add `extension_uris` to plan. - { - AnchorUniquer anchorUniquer("extension_uri.", anchorsByOp); - for (auto uriOp : op.getOps()) { - int32_t anchor = anchorUniquer.assignAnchor(uriOp); - - // Create `SimpleExtensionURI` message. - SimpleExtensionURI *uri = plan->add_extension_uris(); - uri->set_uri(uriOp.getUri().str()); - uri->set_extension_uri_anchor(anchor); - } - } - - // Add `extensions` to plan. This requires the URIs to exist. - { - // Each extension type has its own anchor uniquer. - AnchorUniquer funcUniquer("extension_function.", anchorsByOp); - AnchorUniquer typeUniquer("extension_type.", anchorsByOp); - AnchorUniquer typeVarUniquer("extension_type_variation.", anchorsByOp); - - // Export an op of a given type using the corresponding uniquer. - auto exportExtensionOperation = [&](AnchorUniquer *uniquer, auto extOp) { - using OpTy = decltype(extOp); - using OpTraits = ExtensionOpTraits; - - // Compute URI reference and anchor value. - int32_t uriReference = lookupAnchor(op, extOp.getUri()); - int32_t anchor = uniquer->assignAnchor(extOp); - - // Create `SimpleExtensionDeclaration` and extension-specific messages. - typename OpTraits::ExtensionMessageType ext; - OpTraits::setAnchor(ext, anchor); - ext.set_extension_uri_reference(uriReference); - ext.set_name(extOp.getName().str()); - SimpleExtensionDeclaration *decl = plan->add_extensions(); - *OpTraits::getMutableExtension(*decl) = ext; - }; - - // Iterate over the different types of extension ops. This must be a single - // loop in order to preserve the order, which allows for interleaving of - // different types in both the protobuf and the MLIR form. - for (Operation &extOp : op.getOps()) { - TypeSwitch(extOp) - .Case([&](auto extOp) { - exportExtensionOperation(&funcUniquer, extOp); - }) - .Case([&](auto extOp) { - exportExtensionOperation(&typeUniquer, extOp); - }) - .Case([&](auto extOp) { - exportExtensionOperation(&typeVarUniquer, extOp); - }); - } - } - - // Add `relation`s to plan. - for (auto relOp : op.getOps()) { - Operation *terminator = relOp.getBody().front().getTerminator(); - auto rootOp = - llvm::cast(terminator->getOperand(0).getDefiningOp()); - - FailureOr> rel = exportOperation(rootOp); - if (failed(rel)) - return failure(); - - // Handle `Rel`/`RelRoot` cases depending on whether `names` is set. - PlanRel *planRel = plan->add_relations(); - if (std::optional names = relOp.getFieldNames()) { - auto root = std::make_unique(); - root->set_allocated_input(rel->release()); - - auto namesArray = cast(names.value()).getAsRange(); - for (StringAttr name : namesArray) { - root->add_names(name.getValue().str()); - } - - planRel->set_allocated_root(root.release()); - } else { - planRel->set_allocated_rel(rel->release()); - } - } - - return std::move(plan); -} - -FailureOr> -SubstraitExporter::exportOperation(ProjectOp op) { - // Build `RelCommon` message. - auto relCommon = std::make_unique(); - auto direct = std::make_unique(); - relCommon->set_allocated_direct(direct.release()); - - // Build input `Rel` message. - auto inputOp = - llvm::dyn_cast_if_present(op.getInput().getDefiningOp()); - if (!inputOp) - return op->emitOpError("input was not produced by Substrait relation op"); - - FailureOr> inputRel = exportOperation(inputOp); - if (failed(inputRel)) - return failure(); - - // Build `ProjectRel` message. - auto projectRel = std::make_unique(); - projectRel->set_allocated_common(relCommon.release()); - projectRel->set_allocated_input(inputRel->release()); - - // Build `Expression` messages. - auto yieldOp = - llvm::cast(op.getExpressions().front().getTerminator()); - for (Value val : yieldOp.getValue()) { - // Make sure the yielded value was produced by an expression op. - auto exprRootOp = - llvm::dyn_cast_if_present(val.getDefiningOp()); - if (!exprRootOp) - return op->emitOpError( - "expression not supported for export: yielded op was " - "not produced by Substrait expression op"); - - // Export the expression recursively. - FailureOr> expression = - exportOperation(exprRootOp); - if (failed(expression)) - return failure(); - - // Add the expression to the `ProjectRel` message. - *projectRel->add_expressions() = *expression.value(); - } - - // Build `Rel` message. - auto rel = std::make_unique(); - rel->set_allocated_project(projectRel.release()); - - return rel; -} - -FailureOr> -SubstraitExporter::exportOperation(RelOpInterface op) { - return llvm::TypeSwitch>>(op) - .Case< - // clang-format off - CrossOp, - EmitOp, - FieldReferenceOp, - FilterOp, - NamedTableOp, - ProjectOp - // clang-format on - >([&](auto op) { return exportOperation(op); }) - .Default([](auto op) { - op->emitOpError("not supported for export"); - return failure(); - }); -} - -FailureOr> -SubstraitExporter::exportOperation(Operation *op) { - return llvm::TypeSwitch>>( - op) - .Case( - [&](auto op) -> FailureOr> { - auto typedMessage = exportOperation(op); - if (failed(typedMessage)) - return failure(); - return std::unique_ptr(typedMessage.value().release()); - }) - .Default([](auto op) { - op->emitOpError("not supported for export"); - return failure(); - }); -} - -} // namespace - -namespace mlir { -namespace substrait { - -LogicalResult -translateSubstraitToProtobuf(Operation *op, llvm::raw_ostream &output, - substrait::ImportExportOptions options) { - SubstraitExporter exporter; - FailureOr> result = exporter.exportOperation(op); - if (failed(result)) - return failure(); - - std::string out; - switch (options.serdeFormat) { - case substrait::SerdeFormat::kText: - if (!pb::TextFormat::PrintToString(*result.value(), &out)) { - op->emitOpError("could not be serialized to text format"); - return failure(); - } - break; - case substrait::SerdeFormat::kBinary: - if (!result->get()->SerializeToString(&out)) { - op->emitOpError("could not be serialized to binary format"); - return failure(); - } - break; - case substrait::SerdeFormat::kJson: - case substrait::SerdeFormat::kPrettyJson: { - pb::util::JsonOptions jsonOptions; - if (options.serdeFormat == SerdeFormat::kPrettyJson) - jsonOptions.add_whitespace = true; - pb::util::Status status = - pb::util::MessageToJsonString(*result.value(), &out, jsonOptions); - if (!status.ok()) { - InFlightDiagnostic diag = - op->emitOpError("could not be serialized to JSON format"); - diag.attachNote() << status.message(); - return diag; - } - } - } - - output << out; - return success(); -} - -} // namespace substrait -} // namespace mlir diff --git a/lib/Target/SubstraitPB/Import.cpp b/lib/Target/SubstraitPB/Import.cpp deleted file mode 100644 index 81569061c9e5..000000000000 --- a/lib/Target/SubstraitPB/Import.cpp +++ /dev/null @@ -1,677 +0,0 @@ -//===-- Import.cpp - Import protobuf to Substrait dialect -------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "structured/Target/SubstraitPB/Import.h" - -#include "ProtobufUtils.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/ImplicitLocOpBuilder.h" -#include "mlir/IR/OwningOpRef.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" -#include "structured/Target/SubstraitPB/Options.h" - -#include -#include -#include -#include -#include -#include -#include - -using namespace mlir; -using namespace mlir::substrait; -using namespace ::substrait; -using namespace ::substrait::proto; - -namespace pb = google::protobuf; - -namespace { - -// Forward declaration for the import function of the given message type. -// -// We need one such function for most message types that we want to import. The -// forward declarations are necessary such all import functions are available -// for the definitions indepedently of the order of these definitions. The -// message type passed to the function (specified by `MESSAGE_TYPE`) may be -// different than the one it is responsible for: often the target op type -// (specified by `OP_TYPE`) depends on a nested field value (such as `oneof`) -// but the import logic needs the whole context; the message that is passed in -// is the most deeply nested message that provides the whole context. -#define DECLARE_IMPORT_FUNC(MESSAGE_TYPE, ARG_TYPE, OP_TYPE) \ - static FailureOr import##MESSAGE_TYPE(ImplicitLocOpBuilder builder, \ - const ARG_TYPE &message); - -DECLARE_IMPORT_FUNC(CrossRel, Rel, CrossOp) -DECLARE_IMPORT_FUNC(FilterRel, Rel, FilterOp) -DECLARE_IMPORT_FUNC(Expression, Expression, ExpressionOpInterface) -DECLARE_IMPORT_FUNC(FieldReference, Expression::FieldReference, - FieldReferenceOp) -DECLARE_IMPORT_FUNC(Literal, Expression::Literal, LiteralOp) -DECLARE_IMPORT_FUNC(NamedTable, Rel, NamedTableOp) -DECLARE_IMPORT_FUNC(Plan, Plan, PlanOp) -DECLARE_IMPORT_FUNC(PlanRel, PlanRel, PlanRelOp) -DECLARE_IMPORT_FUNC(ProjectRel, Rel, ProjectOp) -DECLARE_IMPORT_FUNC(ReadRel, Rel, RelOpInterface) -DECLARE_IMPORT_FUNC(Rel, Rel, RelOpInterface) -DECLARE_IMPORT_FUNC(ScalarFunction, Expression::ScalarFunction, CallOp) - -// Helpers to build symbol names from anchors deterministically. This allows -// to reate symbol references from anchors without look-up structure. Also, -// the format is exploited by the export logic to recover the original anchor -// values of (unmodified) imported plans. - -/// Builds a deterministic symbol name for an URI with the given anchor. -static std::string buildUriSymName(int32_t anchor) { - return ("extension_uri." + Twine(anchor)).str(); -} - -/// Builds a deterministic symbol name for a function with the given anchor. -static std::string buildFuncSymName(int32_t anchor) { - return ("extension_function." + Twine(anchor)).str(); -} - -/// Builds a deterministic symbol name for a type with the given anchor. -static std::string buildTypeSymName(int32_t anchor) { - return ("extension_type." + Twine(anchor)).str(); -} - -/// Builds a deterministic symbol name for a type variation with the given -/// anchor. -static std::string buildTypeVarSymName(int32_t anchor) { - return ("extension_type_variation." + Twine(anchor)).str(); -} - -static mlir::FailureOr importType(MLIRContext *context, - const proto::Type &type) { - - proto::Type::KindCase kindCase = type.kind_case(); - switch (kindCase) { - case proto::Type::kBool: - return IntegerType::get(context, 1, IntegerType::Signed); - case proto::Type::kI32: - return IntegerType::get(context, 32, IntegerType::Signed); - case proto::Type::kStruct: { - const proto::Type::Struct &structType = type.struct_(); - llvm::SmallVector fieldTypes; - fieldTypes.reserve(structType.types_size()); - for (const proto::Type &fieldType : structType.types()) { - FailureOr mlirFieldType = importType(context, fieldType); - if (failed(mlirFieldType)) - return failure(); - fieldTypes.push_back(mlirFieldType.value()); - } - return TupleType::get(context, fieldTypes); - } - // TODO(ingomueller): Support more types. - default: { - auto loc = UnknownLoc::get(context); - const pb::FieldDescriptor *desc = - proto::Type::GetDescriptor()->FindFieldByNumber(kindCase); - assert(desc && "could not get field descriptor"); - return emitError(loc) << "could not import unsupported type " - << desc->name(); - } - } -} - -static mlir::FailureOr importCrossRel(ImplicitLocOpBuilder builder, - const Rel &message) { - const CrossRel &crossRel = message.cross(); - - // Import left and right inputs. - const Rel &leftRel = crossRel.left(); - const Rel &rightRel = crossRel.right(); - - mlir::FailureOr leftOp = importRel(builder, leftRel); - mlir::FailureOr rightOp = importRel(builder, rightRel); - - if (failed(leftOp) || failed(rightOp)) - return failure(); - - // Build `CrossOp`. - Value leftVal = leftOp.value()->getResult(0); - Value rightVal = rightOp.value()->getResult(0); - - return builder.create(leftVal, rightVal); -} - -static mlir::FailureOr -importExpression(ImplicitLocOpBuilder builder, const Expression &message) { - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - Expression::RexTypeCase rex_type = message.rex_type_case(); - switch (rex_type) { - case Expression::kLiteral: - return importLiteral(builder, message.literal()); - case Expression::kSelection: - return importFieldReference(builder, message.selection()); - case Expression::kScalarFunction: - return importScalarFunction(builder, message.scalar_function()); - default: { - const pb::FieldDescriptor *desc = - Expression::GetDescriptor()->FindFieldByNumber(rex_type); - return emitError(loc) << Twine("unsupported Expression type: ") + - desc->name(); - } - } -} - -static mlir::FailureOr -importFieldReference(ImplicitLocOpBuilder builder, - const Expression::FieldReference &message) { - using ReferenceSegment = Expression::ReferenceSegment; - - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - // Emit error on unsupported cases. - // TODO(ingomueller): support more cases. - if (!message.has_direct_reference()) - return emitError(loc) << "only direct reference supported"; - - // Traverse list to extract indices. - llvm::SmallVector indices; - const ReferenceSegment *currentSegment = &message.direct_reference(); - while (true) { - if (!currentSegment->has_struct_field()) - return emitError(loc) << "only struct fields supported"; - - const ReferenceSegment::StructField &structField = - currentSegment->struct_field(); - indices.push_back(structField.field()); - - // Continue in linked list or end traversal. - if (!structField.has_child()) - break; - currentSegment = &structField.child(); - } - - // Get input value. - Value container; - if (message.has_root_reference()) { - // For the `root_reference` case, that's the current block argument. - mlir::Block::BlockArgListType blockArgs = - builder.getInsertionBlock()->getArguments(); - assert(blockArgs.size() == 1 && "expected a single block argument"); - container = blockArgs.front(); - } else if (message.has_expression()) { - // For the `expression` case, recursively import the expression. - FailureOr maybeContainer = - importExpression(builder, message.expression()); - if (failed(maybeContainer)) - return failure(); - container = maybeContainer.value()->getResult(0); - } else { - // For the `outer_reference` case, we need to refer to an argument of some - // outer-level block. - // TODO(ingomueller): support outer references. - assert(message.has_outer_reference() && "unexpected 'root_type` case"); - return emitError(loc) << "outer references not supported"; - } - - // Build and return the op. - return builder.create(container, indices); -} - -static mlir::FailureOr -importLiteral(ImplicitLocOpBuilder builder, - const Expression::Literal &message) { - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - Expression::Literal::LiteralTypeCase literalType = - message.literal_type_case(); - switch (literalType) { - case Expression::Literal::LiteralTypeCase::kBoolean: { - auto attr = IntegerAttr::get( - IntegerType::get(context, 1, IntegerType::Signed), message.boolean()); - return builder.create(attr); - } - case Expression::Literal::LiteralTypeCase::kI32: { - auto attr = IntegerAttr::get( - IntegerType::get(context, 32, IntegerType::Signed), message.i32()); - return builder.create(attr); - } - default: { - const pb::FieldDescriptor *desc = - Expression::Literal::GetDescriptor()->FindFieldByNumber(literalType); - return emitError(loc) << Twine("unsupported Literal type: ") + desc->name(); - } - } -} - -static mlir::FailureOr importFilterRel(ImplicitLocOpBuilder builder, - const Rel &message) { - const FilterRel &filterRel = message.filter(); - - // Import input op. - const Rel &inputRel = filterRel.input(); - mlir::FailureOr inputOp = importRel(builder, inputRel); - if (failed(inputOp)) - return failure(); - - // Create filter op. - auto filterOp = builder.create(inputOp.value()->getResult(0)); - filterOp.getCondition().push_back(new Block); - Block &conditionBlock = filterOp.getCondition().front(); - conditionBlock.addArgument(filterOp.getResult().getType(), - filterOp->getLoc()); - - // Create condition region. - const Expression &expression = filterRel.condition(); - { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(&conditionBlock); - - FailureOr conditionOp = - importExpression(builder, expression); - if (failed(conditionOp)) - return failure(); - - builder.create(conditionOp.value()->getResult(0)); - } - - return filterOp; -} - -static mlir::FailureOr -importNamedTable(ImplicitLocOpBuilder builder, const Rel &message) { - const ReadRel &readRel = message.read(); - const ReadRel::NamedTable &namedTable = readRel.named_table(); - MLIRContext *context = builder.getContext(); - - // Assemble table name. - llvm::SmallVector tableNameRefs; - tableNameRefs.reserve(namedTable.names_size()); - for (const std::string &name : namedTable.names()) { - auto attr = FlatSymbolRefAttr::get(context, name); - tableNameRefs.push_back(attr); - } - llvm::ArrayRef tableNameNestedRefs = - llvm::ArrayRef(tableNameRefs).drop_front(); - llvm::StringRef tableNameRootRef = tableNameRefs.front().getValue(); - auto tableName = - SymbolRefAttr::get(context, tableNameRootRef, tableNameNestedRefs); - - // Assemble field names from schema. - const NamedStruct &baseSchema = readRel.base_schema(); - llvm::SmallVector fieldNames; - fieldNames.reserve(baseSchema.names_size()); - for (const std::string &name : baseSchema.names()) { - auto attr = StringAttr::get(context, name); - fieldNames.push_back(attr); - } - auto fieldNamesAttr = ArrayAttr::get(context, fieldNames); - - // Assemble field names from schema. - const proto::Type::Struct &struct_ = baseSchema.struct_(); - llvm::SmallVector resultTypes; - resultTypes.reserve(struct_.types_size()); - for (const proto::Type &type : struct_.types()) { - FailureOr mlirType = importType(context, type); - if (failed(mlirType)) - return failure(); - resultTypes.push_back(mlirType.value()); - } - auto resultType = TupleType::get(context, resultTypes); - - // Assemble final op. - auto namedTableOp = - builder.create(resultType, tableName, fieldNamesAttr); - - return namedTableOp; -} - -static FailureOr importPlan(ImplicitLocOpBuilder builder, - const Plan &message) { - using extensions::SimpleExtensionDeclaration; - using extensions::SimpleExtensionURI; - using ExtensionFunction = SimpleExtensionDeclaration::ExtensionFunction; - using ExtensionType = SimpleExtensionDeclaration::ExtensionType; - using ExtensionTypeVariation = - SimpleExtensionDeclaration::ExtensionTypeVariation; - - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - const Version &version = message.version(); - auto planOp = builder.create( - version.major_number(), version.minor_number(), version.patch_number(), - version.git_hash(), version.producer()); - planOp.getBody().push_back(new Block()); - - OpBuilder::InsertionGuard insertGuard(builder); - builder.setInsertionPointToEnd(&planOp.getBody().front()); - - // Import `extension_uris` creating symbol names deterministically. - for (const SimpleExtensionURI &extUri : message.extension_uris()) { - int32_t anchor = extUri.extension_uri_anchor(); - StringRef uri = extUri.uri(); - std::string symName = buildUriSymName(anchor); - builder.create(symName, uri); - } - - // Import `extension`s reconstructing symbol references to URI ops from the - // corresponding anchors using the same method as above. - for (const SimpleExtensionDeclaration &ext : message.extensions()) { - SimpleExtensionDeclaration::MappingTypeCase mappingCase = - ext.mapping_type_case(); - switch (mappingCase) { - case SimpleExtensionDeclaration::kExtensionFunction: { - const ExtensionFunction &func = ext.extension_function(); - int32_t anchor = func.function_anchor(); - int32_t uriRef = func.extension_uri_reference(); - const std::string &funcName = func.name(); - std::string symName = buildFuncSymName(anchor); - std::string uriSymName = buildUriSymName(uriRef); - builder.create(symName, uriSymName, funcName); - break; - } - case SimpleExtensionDeclaration::kExtensionType: { - const ExtensionType &type = ext.extension_type(); - int32_t anchor = type.type_anchor(); - int32_t uriRef = type.extension_uri_reference(); - const std::string &typeName = type.name(); - std::string symName = buildTypeSymName(anchor); - std::string uriSymName = buildUriSymName(uriRef); - builder.create(symName, uriSymName, typeName); - break; - } - case SimpleExtensionDeclaration::kExtensionTypeVariation: { - const ExtensionTypeVariation &typeVar = ext.extension_type_variation(); - int32_t anchor = typeVar.type_variation_anchor(); - int32_t uriRef = typeVar.extension_uri_reference(); - const std::string &typeVarName = typeVar.name(); - std::string symName = buildTypeVarSymName(anchor); - std::string uriSymName = buildUriSymName(uriRef); - builder.create(symName, uriSymName, - typeVarName); - break; - } - default: - const pb::FieldDescriptor *desc = - SimpleExtensionDeclaration::GetDescriptor()->FindFieldByNumber( - mappingCase); - return emitError(loc) - << Twine("unsupported SimpleExtensionDeclaration type: ") + - desc->name(); - } - } - - for (const PlanRel &relation : message.relations()) { - if (failed(importPlanRel(builder, relation))) - return failure(); - } - - return planOp; -} - -static FailureOr importPlanRel(ImplicitLocOpBuilder builder, - const PlanRel &message) { - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - if (!message.has_rel() && !message.has_root()) { - PlanRel::RelTypeCase relType = message.rel_type_case(); - const pb::FieldDescriptor *desc = - PlanRel::GetDescriptor()->FindFieldByNumber(relType); - return emitError(loc) << Twine("unsupported PlanRel type: ") + desc->name(); - } - - // Create new `PlanRelOp`. - auto planRelOp = builder.create(); - planRelOp.getBody().push_back(new Block()); - Block *block = &planRelOp.getBody().front(); - - // Handle `Rel` and `RelRoot` separately. - const Rel *rel; - if (message.has_rel()) - rel = &message.rel(); - else { - const RelRoot &root = message.root(); - rel = &root.input(); - - // Extract names. - SmallVector names(root.names().begin(), root.names().end()); - SmallVector nameAttrs(names.begin(), names.end()); - ArrayAttr namesAttr = builder.getStrArrayAttr(nameAttrs); - planRelOp.setFieldNamesAttr(namesAttr); - } - - // Import body of `PlanRelOp`. - OpBuilder::InsertionGuard insertGuard(builder); - builder.setInsertionPointToEnd(block); - mlir::FailureOr rootRel = importRel(builder, *rel); - if (failed(rootRel)) - return failure(); - - builder.setInsertionPointToEnd(block); - builder.create(rootRel.value()->getResult(0)); - - return planRelOp; -} - -static mlir::FailureOr importProjectRel(ImplicitLocOpBuilder builder, - const Rel &message) { - const ProjectRel &projectRel = message.project(); - - // Import input op. - const Rel &inputRel = projectRel.input(); - mlir::FailureOr inputOp = importRel(builder, inputRel); - if (failed(inputOp)) - return failure(); - - // Create `expressions` block. - auto conditionBlock = std::make_unique(); - auto inputTupleType = - cast(inputOp.value()->getResult(0).getType()); - conditionBlock->addArgument(inputTupleType, inputOp->getLoc()); - - // Fill `expressions` block with expression trees. - YieldOp yieldOp; - { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(conditionBlock.get()); - - SmallVector values; - values.reserve(projectRel.expressions_size()); - for (const Expression &expression : projectRel.expressions()) { - // Import expression tree recursively. - FailureOr rootExprOp = - importExpression(builder, expression); - if (failed(rootExprOp)) - return failure(); - values.push_back(rootExprOp.value()->getResult(0)); - } - - // Create final `yield` op with root expression values. - yieldOp = builder.create(values); - } - - // Compute output type. - SmallVector resultFieldTypes; - resultFieldTypes.reserve(inputTupleType.size() + yieldOp->getNumOperands()); - append_range(resultFieldTypes, inputTupleType); - append_range(resultFieldTypes, yieldOp->getOperandTypes()); - auto resultType = TupleType::get(builder.getContext(), resultFieldTypes); - - // Create `project` op. - auto projectOp = - builder.create(resultType, inputOp.value()->getResult(0)); - projectOp.getExpressions().push_back(conditionBlock.release()); - - return projectOp; -} - -static mlir::FailureOr -importReadRel(ImplicitLocOpBuilder builder, const Rel &message) { - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - const ReadRel &readRel = message.read(); - ReadRel::ReadTypeCase readType = readRel.read_type_case(); - switch (readType) { - case ReadRel::ReadTypeCase::kNamedTable: { - return importNamedTable(builder, message); - } - default: - const pb::FieldDescriptor *desc = - ReadRel::GetDescriptor()->FindFieldByNumber(readType); - return emitError(loc) << Twine("unsupported ReadRel type: ") + desc->name(); - } -} - -static mlir::FailureOr importRel(ImplicitLocOpBuilder builder, - const Rel &message) { - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - // Import rel depending on its type. - Rel::RelTypeCase relType = message.rel_type_case(); - FailureOr maybeOp; - switch (relType) { - case Rel::RelTypeCase::kCross: - maybeOp = importCrossRel(builder, message); - break; - case Rel::RelTypeCase::kFilter: - maybeOp = importFilterRel(builder, message); - break; - case Rel::RelTypeCase::kProject: - maybeOp = importProjectRel(builder, message); - break; - case Rel::RelTypeCase::kRead: - maybeOp = importReadRel(builder, message); - break; - default: - const pb::FieldDescriptor *desc = - Rel::GetDescriptor()->FindFieldByNumber(relType); - return emitError(loc) << Twine("unsupported Rel type: ") + desc->name(); - } - if (failed(maybeOp)) - return failure(); - RelOpInterface op = maybeOp.value(); - - // Remainder: Import `emit` op if needed. - - // Extract `RelCommon` message. - FailureOr maybeRelCommon = - protobuf_utils::getCommon(message, loc); - if (failed(maybeRelCommon)) - return failure(); - const RelCommon *relCommon = maybeRelCommon.value(); - - // For the `direct` case, no further op needs to be created. - if (relCommon->has_direct()) - return op; - assert(relCommon->has_emit() && "expected either 'direct' or 'emit'"); - - // For the `emit` case, we need to insert an `EmitOp`. - const proto::RelCommon::Emit &emit = relCommon->emit(); - SmallVector mapping; - append_range(mapping, emit.output_mapping()); - ArrayAttr mappingAttr = builder.getI64ArrayAttr(mapping); - auto emitOp = builder.create(op->getResult(0), mappingAttr); - - return {emitOp}; -} - -static mlir::FailureOr -importScalarFunction(ImplicitLocOpBuilder builder, - const Expression::ScalarFunction &message) { - MLIRContext *context = builder.getContext(); - Location loc = UnknownLoc::get(context); - - // Import `output_type`. - const proto::Type &outputType = message.output_type(); - FailureOr mlirOutputType = importType(context, outputType); - if (failed(mlirOutputType)) - return failure(); - - // Import `arguments`. - SmallVector operands; - for (const FunctionArgument &arg : message.arguments()) { - // Error out on unsupported cases. - // TODO(ingomueller): Support other function argument types. - if (!arg.has_value()) { - const pb::FieldDescriptor *desc = - FunctionArgument::GetDescriptor()->FindFieldByNumber( - arg.arg_type_case()); - return emitError(loc) << Twine("unsupported arg type: ") + desc->name(); - } - - // Handle `value` case. - const Expression &value = arg.value(); - FailureOr expression = - importExpression(builder, value); - if (failed(expression)) - return failure(); - operands.push_back((*expression)->getResult(0)); - } - - // Import `function_reference` field. - int32_t anchor = message.function_reference(); - std::string calleeSymName = buildFuncSymName(anchor); - - // Create op. - auto callOp = - builder.create(mlirOutputType.value(), calleeSymName, operands); - - return {callOp}; -} - -} // namespace - -namespace mlir { -namespace substrait { - -OwningOpRef -translateProtobufToSubstrait(llvm::StringRef input, MLIRContext *context, - ImportExportOptions options) { - Location loc = UnknownLoc::get(context); - auto plan = std::make_unique(); - switch (options.serdeFormat) { - case substrait::SerdeFormat::kText: - if (!pb::TextFormat::ParseFromString(input.str(), plan.get())) { - emitError(loc) << "could not parse string as 'Plan' message."; - return {}; - } - break; - case substrait::SerdeFormat::kBinary: - if (!plan->ParseFromString(input.str())) { - emitError(loc) << "could not deserialize input as 'Plan' message."; - return {}; - } - break; - case substrait::SerdeFormat::kJson: - case substrait::SerdeFormat::kPrettyJson: { - pb::util::Status status = - pb::util::JsonStringToMessage(input.str(), plan.get()); - if (!status.ok()) { - emitError(loc) << "could not deserialize JSON as 'Plan' message:\n" - << status.message().as_string(); - return {}; - } - } - } - - context->loadDialect(); - - ImplicitLocOpBuilder builder(loc, context); - auto module = builder.create(loc); - auto moduleRef = OwningOpRef(module); - builder.setInsertionPointToEnd(&module.getBodyRegion().back()); - - if (failed(importPlan(builder, *plan))) - return {}; - - return moduleRef; -} - -} // namespace substrait -} // namespace mlir diff --git a/lib/Target/SubstraitPB/ProtobufUtils.cpp b/lib/Target/SubstraitPB/ProtobufUtils.cpp deleted file mode 100644 index 9df03127bbf1..000000000000 --- a/lib/Target/SubstraitPB/ProtobufUtils.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===-- ProtobufUtils.cpp - Utils for Substrait protobufs -------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ProtobufUtils.h" -#include "mlir/IR/Diagnostics.h" - -#include - -using namespace mlir; -using namespace ::substrait; -using namespace ::substrait::proto; - -namespace pb = google::protobuf; - -namespace mlir::substrait::protobuf_utils { - -template -static const RelCommon *getCommon(const RelType &rel) { - return &rel.common(); -} - -FailureOr getCommon(const Rel &rel, Location loc) { - Rel::RelTypeCase relType = rel.rel_type_case(); - switch (relType) { - case Rel::RelTypeCase::kCross: - return getCommon(rel.cross()); - case Rel::RelTypeCase::kFilter: - return getCommon(rel.filter()); - case Rel::RelTypeCase::kProject: - return getCommon(rel.project()); - case Rel::RelTypeCase::kRead: - return getCommon(rel.read()); - default: - const pb::FieldDescriptor *desc = - Rel::GetDescriptor()->FindFieldByNumber(relType); - return emitError(loc) << Twine("unsupported Rel type: ") + desc->name(); - } -} - -template -static RelCommon *getMutableCommon(RelType *rel) { - return rel->mutable_common(); -} - -FailureOr getMutableCommon(Rel *rel, Location loc) { - Rel::RelTypeCase relType = rel->rel_type_case(); - switch (relType) { - case Rel::RelTypeCase::kCross: - return getMutableCommon(rel->mutable_cross()); - case Rel::RelTypeCase::kFilter: - return getMutableCommon(rel->mutable_filter()); - case Rel::RelTypeCase::kProject: - return getMutableCommon(rel->mutable_project()); - case Rel::RelTypeCase::kRead: - return getMutableCommon(rel->mutable_read()); - default: - const pb::FieldDescriptor *desc = - Rel::GetDescriptor()->FindFieldByNumber(relType); - return emitError(loc) << Twine("unsupported Rel type: ") + desc->name(); - } -} - -} // namespace mlir::substrait::protobuf_utils diff --git a/lib/Target/SubstraitPB/ProtobufUtils.h b/lib/Target/SubstraitPB/ProtobufUtils.h deleted file mode 100644 index 426d0c8eacd2..000000000000 --- a/lib/Target/SubstraitPB/ProtobufUtils.h +++ /dev/null @@ -1,33 +0,0 @@ -//===-- ProtobufUtils.h - Utils for Substrait protobufs ---------*- C++ -*-===// -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LIB_TARGET_SUBSTRAITPB_PROTOBUFUTILS_H -#define LIB_TARGET_SUBSTRAITPB_PROTOBUFUTILS_H - -#include "mlir/IR/Location.h" - -namespace substrait::proto { -class RelCommon; -class Rel; -} // namespace substrait::proto - -namespace mlir::substrait::protobuf_utils { - -/// Extract the `RelCommon` message from any possible `rel_type` message of the -/// given `rel`. Reports errors using the given `loc`. -FailureOr -getCommon(const ::substrait::proto::Rel &rel, Location loc); - -/// Extract the `RelCommon` message from any possible `rel_type` message of the -/// given `rel`. Reports errors using the given `loc`. -FailureOr<::substrait::proto::RelCommon *> -getMutableCommon(::substrait::proto::Rel *rel, Location loc); - -} // namespace mlir::substrait::protobuf_utils - -#endif // LIB_TARGET_SUBSTRAITPB_PROTOBUFUTILS_H diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 36f9f59a7016..7bc90cbf9539 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -20,15 +20,6 @@ declare_mlir_dialect_python_bindings( DIALECT_NAME iterators ) -declare_mlir_dialect_python_bindings( - ADD_TO_PARENT StructuredPythonSources.Dialects - ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir_structured" - TD_FILE dialects/SubstraitOps.td - SOURCES - dialects/substrait.py - DIALECT_NAME substrait -) - declare_mlir_dialect_python_bindings( ADD_TO_PARENT StructuredPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir_structured" diff --git a/python/StructuredDialects.cpp b/python/StructuredDialects.cpp index 6522a75b2f1b..f412f489ad65 100644 --- a/python/StructuredDialects.cpp +++ b/python/StructuredDialects.cpp @@ -60,105 +60,6 @@ PYBIND11_MODULE(_structuredDialects, mainModule) { py::arg("cls"), py::arg("element_type"), py::arg("context") = py::none()); - //===--------------------------------------------------------------------===// - // Substrait dialect. - //===--------------------------------------------------------------------===// - auto substraitModule = mainModule.def_submodule("substrait"); - - // - // Dialect - // - - substraitModule.def( - "register_dialect", - [](MlirContext context, bool doLoad) { - MlirDialectHandle handle = mlirGetDialectHandle__substrait__(); - mlirDialectHandleRegisterDialect(handle, context); - if (doLoad) - mlirDialectHandleLoadDialect(handle, context); - }, - py::arg("context") = py::none(), py::arg("load") = true); - - // - // Import - // - - static const auto importSubstraitPlan = [](const std::string &input, - MlirContext context, - MlirSubstraitSerdeFormat format) { - MlirStringRef mlirInput{/*data=*/input.data(), /*length=*/input.size()}; - MlirModule module = mlirSubstraitImportPlan(context, mlirInput, format); - if (mlirModuleIsNull(module)) - throw std::invalid_argument("Could not import Substrait plan"); - return module; - }; - - substraitModule.def( - "from_binpb", - [&](const std::string &input, MlirContext context) { - return importSubstraitPlan(input, context, - MlirSubstraitBinarySerdeFormat); - }, - py::arg("input") = py::none(), py::arg("context") = py::none(), - "Import a Substrait plan in the binary protobuf format"); - - substraitModule.def( - "from_textpb", - [&](const std::string &input, MlirContext context) { - return importSubstraitPlan(input, context, - MlirSubstraitTextSerdeFormat); - }, - py::arg("input") = py::none(), py::arg("context") = py::none(), - "Import a Substrait plan in the textual protobuf format"); - - substraitModule.def( - "from_json", - [&](const std::string &input, MlirContext context) { - return importSubstraitPlan(input, context, - MlirSubstraitJsonSerdeFormat); - }, - py::arg("input") = py::none(), py::arg("context") = py::none(), - "Import a Substrait plan in the JSON format"); - - // - // Export - // - - static const auto exportSubstraitPlan = [](MlirOperation op, - MlirSubstraitSerdeFormat format) { - MlirAttribute attr = mlirSubstraitExportPlan(op, format); - if (mlirAttributeIsNull(attr)) - throw std::invalid_argument("Could not export Substrait plan"); - MlirStringRef strRef = mlirStringAttrGetValue(attr); - std::string_view str(strRef.data, strRef.length); - return str; - }; - - substraitModule.def( - "to_binpb", - [&](MlirOperation op) { - return exportSubstraitPlan(op, MlirSubstraitBinarySerdeFormat); - }, - py::arg("op"), "Export a Substrait plan into the binary protobuf format"); - - substraitModule.def( - "to_textpb", - [&](MlirOperation op) { - return exportSubstraitPlan(op, MlirSubstraitTextSerdeFormat); - }, - py::arg("op"), - "Export a Substrait plan into the textual protobuf format"); - - substraitModule.def( - "to_json", - [&](MlirOperation op, bool pretty) { - auto format = pretty ? MlirSubstraitPrettyJsonSerdeFormat - : MlirSubstraitJsonSerdeFormat; - return exportSubstraitPlan(op, format); - }, - py::arg("op"), py::arg("pretty") = false, - "Export a Substrait plan into the JSON format"); - //===--------------------------------------------------------------------===// // Tabular dialect. //===--------------------------------------------------------------------===// diff --git a/python/mlir_structured/dialects/SubstraitOps.td b/python/mlir_structured/dialects/SubstraitOps.td deleted file mode 100644 index 7ce3825dcd66..000000000000 --- a/python/mlir_structured/dialects/SubstraitOps.td +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef PYTHON_BINDINGS_SUBSTRAIT_OPS -#define PYTHON_BINDINGS_SUBSTRAIT_OPS - -include "structured/Dialect/Substrait/IR/SubstraitOps.td" - -#endif // PYTHON_BINDINGS_SUBSTRAIT_OPS diff --git a/python/mlir_structured/dialects/substrait.py b/python/mlir_structured/dialects/substrait.py deleted file mode 100644 index 77d60455ff66..000000000000 --- a/python/mlir_structured/dialects/substrait.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from typing import Optional, Sequence - -from ._substrait_ops_gen import * -from ._substrait_ops_gen import _Dialect -from .._mlir_libs._structuredDialects.substrait import * - -try: - from .. import ir - from ._ods_common import ( - _cext as _ods_cext,) -except ImportError as e: - raise RuntimeError("Error loading imports from extension module") from e - - -@_ods_cext.register_operation(_Dialect, replace=True) -class PlanOp(PlanOp): - - def __init__(self, *args, version: Optional[Sequence[int]] = None, **kwargs): - if version is not None: - major, minor, patch = version - for part in ["major", "minor", "patch"]: - if (part + "_number") in kwargs: - raise ValueError( - "'version' and '(major|minor|patch)_number' are mutually exclusive" - ) - args = (major, minor, patch) + args - super().__init__(*args, **kwargs) - self.regions[0].blocks.append() - - @property - def body(self) -> ir.Block: - return self.regions[0].blocks[0] - - def to_json(self, pretty: bool = False) -> str: - return to_json(self.operation, pretty) - - def to_binpb(self) -> str: - return to_binpb(self.operation) - - def to_textpb(self) -> str: - return to_textpb(self.operation) - - -@_ods_cext.register_operation(_Dialect, replace=True) -class PlanRelOp(PlanRelOp): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.regions[0].blocks.append() - - @property - def body(self) -> ir.Block: - return self.regions[0].blocks[0] diff --git a/requirements.txt b/requirements.txt index 1d6afcec58b3..58aa9c544eee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,13 +2,7 @@ -r third_party/llvm-project/mlir/python/requirements.txt # Testing. -datafusion==32.0.0 -duckdb -ibis==3.3.0 -ibis-framework==8.0.0 -ibis-substrait==3.2.0 lit -pyarrow # Plotting. pandas diff --git a/test/Dialect/Substrait/call.mlir b/test/Dialect/Substrait/call.mlir deleted file mode 100644 index 65186301388a..000000000000 --- a/test/Dialect/Substrait/call.mlir +++ /dev/null @@ -1,27 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: named_table -// CHECK-NEXT: filter -// CHECK-NEXT: (%[[ARG0:.*]]: tuple) -// CHECK-NEXT: %[[V0:.*]] = field_reference %[[ARG0]] -// CHECK-NEXT: %[[V1:.*]] = call @function(%[[V0]]) : (si32) -> si1 -// CHECK-NEXT: yield -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - extension_uri @extension at "http://some.url/with/extensions.yml" - extension_function @function at @extension["somefunc"] - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = field_reference %arg[0] : tuple - %3 = call @function(%2) : (si32) -> si1 - yield %3 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/canonicalize.mlir b/test/Dialect/Substrait/canonicalize.mlir deleted file mode 100644 index 79ed18a8e725..000000000000 --- a/test/Dialect/Substrait/canonicalize.mlir +++ /dev/null @@ -1,93 +0,0 @@ -// RUN: structured-opt -split-input-file %s -canonicalize \ -// RUN: | FileCheck %s - -// Check that identiy mapping is folded. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: yield %[[V0]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [0, 1] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -// Check that non-identiy mapping is not folded. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit {{.*}} from %[[V0]] -// CHECK-NEXT: yield %[[V1]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 0] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -// Check that identiy prefix is not folded. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [0] from %[[V0]] -// CHECK-NEXT: yield %[[V1]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [0] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -// Check that chains of `emit` ops are folded into one. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: yield %[[V0]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 0] from %0 : tuple -> tuple - %2 = emit [1, 0] from %1 : tuple -> tuple - %3 = emit [0, 0, 1, 1] from %2 : tuple -> tuple - %4 = emit [3, 0, 1] from %3 : tuple -> tuple - %5 = emit [1, 0] from %4 : tuple -> tuple - yield %5 : tuple - } -} - -// ----- - -// Check that empty `project` folded. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: yield %[[V0]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = project %0 : tuple -> tuple { - ^bb0(%arg0: tuple): - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/cross.mlir b/test/Dialect/Substrait/cross.mlir deleted file mode 100644 index 6e66025e316b..000000000000 --- a/test/Dialect/Substrait/cross.mlir +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table -// CHECK: %[[V1:.*]] = named_table -// CHECK-NEXT: %[[V2:.*]] = cross %[[V0]] x %[[V1]] -// CHECK-SAME: : tuple x tuple -// CHECK-NEXT: yield %[[V2]] : tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = named_table @t2 as ["b"] : tuple - %2 = cross %0 x %1 : tuple x tuple - yield %2 : tuple - } -} diff --git a/test/Dialect/Substrait/emit-invalid.mlir b/test/Dialect/Substrait/emit-invalid.mlir deleted file mode 100644 index c1dc844fab88..000000000000 --- a/test/Dialect/Substrait/emit-invalid.mlir +++ /dev/null @@ -1,23 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+2 {{'substrait.emit' op failed to infer returned types}} - // expected-error@+1 {{1 is not a valid index into 'tuple'}} - %1 = emit [1] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+2 {{'substrait.emit' op failed to infer returned types}} - // expected-error@+1 {{-1 is not a valid index into 'tuple'}} - %1 = emit [-1] from %0 : tuple -> tuple - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/emit.mlir b/test/Dialect/Substrait/emit.mlir deleted file mode 100644 index 6689c3798b7b..000000000000 --- a/test/Dialect/Substrait/emit.mlir +++ /dev/null @@ -1,48 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-SAME: tuple -> tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 0] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [0, 0] from %[[V0]] : -// CHECK-SAME: tuple -> tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = emit [0, 0] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1] from %[[V0]] : -// CHECK-SAME: tuple -> tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1] from %0 : tuple -> tuple - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/field-reference-invalid.mlir b/test/Dialect/Substrait/field-reference-invalid.mlir deleted file mode 100644 index f248566aa442..000000000000 --- a/test/Dialect/Substrait/field-reference-invalid.mlir +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - // expected-error@+2 {{can't extract element from type 'si32'}} - // expected-error@+1 {{mismatching position and type (position: array, type: 'tuple')}} - %2 = field_reference %arg[0, 0] : tuple - %3 = literal 0 : si1 - yield %3 : si1 - } - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - // expected-error@+2 {{2 is not a valid index for 'tuple'}} - // expected-error@+1 {{mismatching position and type (position: array, type: 'tuple')}} - %2 = field_reference %arg[2] : tuple - %3 = literal 0 : si1 - yield %3 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/field-reference.mlir b/test/Dialect/Substrait/field-reference.mlir deleted file mode 100644 index c3fb596513f6..000000000000 --- a/test/Dialect/Substrait/field-reference.mlir +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: named_table -// CHECK-NEXT: filter -// CHECK-NEXT: (%[[ARG0:.*]]: tuple): -// CHECK-NEXT: %[[V0:.*]] = field_reference %[[ARG0]][0] : tuple -// CHECK-NEXT: yield %[[V0]] : si1 - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = field_reference %arg[0] : tuple - yield %2 : si1 - } - yield %1 : tuple - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: named_table -// CHECK-NEXT: filter -// CHECK-NEXT: (%[[ARG0:.*]]: tuple>): -// CHECK-NEXT: %[[V0:.*]] = field_reference %[[ARG0]][1, 0] : tuple> -// CHECK-NEXT: yield %[[V0]] : si1 - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b", "c"] : tuple> - %1 = filter %0 : tuple> { - ^bb0(%arg : tuple>): - %2 = field_reference %arg[1, 0] : tuple> - yield %2 : si1 - } - yield %1 : tuple> - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: named_table -// CHECK-NEXT: filter -// CHECK-NEXT: (%[[ARG0:.*]]: tuple>): -// CHECK-NEXT: %[[V0:.*]] = field_reference %[[ARG0]][1] : tuple> -// CHECK-NEXT: %[[V1:.*]] = field_reference %[[V0]][0] : tuple -// CHECK-NEXT: yield %[[V1]] : si1 - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b", "c"] : tuple> - %1 = filter %0 : tuple> { - ^bb0(%arg : tuple>): - %2 = field_reference %arg[1] : tuple> - %3 = field_reference %2[0] : tuple - yield %3 : si1 - } - yield %1 : tuple> - } -} diff --git a/test/Dialect/Substrait/filter-invalid.mlir b/test/Dialect/Substrait/filter-invalid.mlir deleted file mode 100644 index c1eaef881520..000000000000 --- a/test/Dialect/Substrait/filter-invalid.mlir +++ /dev/null @@ -1,58 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.filter' op must have 'condition' region yielding one value (yields 2)}} - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = literal 0 : si1 - yield %2, %2 : si1, si1 - } - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.filter' op must have 'condition' region yielding 'si1' (yields 'si32')}} - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = literal 42 : si32 - yield %2 : si32 - } - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.filter' op must have 'condition' region taking 'tuple' as argument (takes no arguments)}} - %1 = filter %0 : tuple { - %2 = literal 0 : si1 - yield %2 : si1 - } - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.filter' op must have 'condition' region taking 'tuple' as argument (takes 'tuple<>')}} - %1 = filter %0 : tuple { - ^bb0(%arg : tuple<>): - %2 = literal 0 : si1 - yield %2 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/filter.mlir b/test/Dialect/Substrait/filter.mlir deleted file mode 100644 index 403fe06c6421..000000000000 --- a/test/Dialect/Substrait/filter.mlir +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = filter %[[V0]] : tuple { -// CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple): -// CHECK-NEXT: %[[V2:.*]] = literal -1 : si1 -// CHECK-NEXT: yield %[[V2]] : si1 -// CHECK-NEXT: } -// CHECK-NEXT: yield %[[V1]] : - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = literal -1 : si1 - yield %2 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/literal-invalid.mlir b/test/Dialect/Substrait/literal-invalid.mlir deleted file mode 100644 index 6bbafc7ec292..000000000000 --- a/test/Dialect/Substrait/literal-invalid.mlir +++ /dev/null @@ -1,5 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - - -// expected-error@+1 {{unsuited attribute for literal value: unit}} -%0 = substrait.literal unit diff --git a/test/Dialect/Substrait/named-table-invalid.mlir b/test/Dialect/Substrait/named-table-invalid.mlir deleted file mode 100644 index a461f0f16d9d..000000000000 --- a/test/Dialect/Substrait/named-table-invalid.mlir +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -// Test error if providing too many names (1 name for 0 fields). -substrait.plan version 0 : 42 : 1 { - relation { - // expected-error@+2 {{'substrait.named_table' op has mismatching 'field_names' (["a"]) and result type ('tuple<>')}} - // expected-note@+1 {{too many field names provided}} - %0 = named_table @t1 as ["a"] : tuple<> - yield %0 : tuple<> - } -} - -// ----- - -// Test error if providing too few names (0 names for 1 field). -substrait.plan version 0 : 42 : 1 { - relation { - // expected-error@+2 {{'substrait.named_table' op has mismatching 'field_names' ([]) and result type ('tuple')}} - // expected-error@+1 {{not enough field names provided}} - %0 = named_table @t1 as [] : tuple - yield %0 : tuple - } -} - - -// ----- - -// Test error if providing duplicate field names in the same nesting level. -substrait.plan version 0 : 42 : 1 { - relation { - // expected-error@+2 {{'substrait.named_table' op has mismatching 'field_names' (["a", "a"]) and result type ('tuple')}} - // expected-error@+1 {{duplicate field name: 'a'}} - %0 = named_table @t1 as ["a", "a"] : tuple - yield %0 : tuple - } -} diff --git a/test/Dialect/Substrait/named-table.mlir b/test/Dialect/Substrait/named-table.mlir deleted file mode 100644 index 91be01311238..000000000000 --- a/test/Dialect/Substrait/named-table.mlir +++ /dev/null @@ -1,67 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table @t1 as [] : tuple<> -// CHECK-NEXT: yield %[[V0]] : -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as [] : tuple<> - yield %0 : tuple<> - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table @t1 as ["a"] : tuple -// CHECK-NEXT: yield %[[V0]] : -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - yield %0 : tuple - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table @t1 as ["a", "b"] : tuple -// CHECK-NEXT: yield %[[V0]] : -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - yield %0 : tuple - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table @t1 -// CHECK-SAME: as ["outer", "inner"] : tuple> -// CHECK-NEXT: yield %[[V0]] : -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["outer", "inner"] : tuple> - yield %0 : tuple> - } -} - -// ----- - -// CHECK-LABEL: substrait.plan -// CHECK: relation -// CHECK: %[[V0:.*]] = named_table @t1 -// CHECK-SAME: as ["a", "a"] : tuple> -// CHECK-NEXT: yield %[[V0]] : -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "a"] : tuple> - yield %0 : tuple> - } -} diff --git a/test/Dialect/Substrait/plan-invalid.mlir b/test/Dialect/Substrait/plan-invalid.mlir deleted file mode 100644 index b9dfd2cfd2ea..000000000000 --- a/test/Dialect/Substrait/plan-invalid.mlir +++ /dev/null @@ -1,42 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -// Test error if no symbol was found for `extension_function` op. -substrait.plan version 0 : 42 : 1 { - // expected-error@+1 {{'substrait.extension_function' op refers to @extension, which is not a valid 'uri' op}} - extension_function @function at @extension["somefunc"] -} - -// ----- - -// Test error if no symbol was found for `extension_type` op. -substrait.plan version 0 : 42 : 1 { - // expected-error@+1 {{'substrait.extension_type' op refers to @extension, which is not a valid 'uri' op}} - extension_type @type at @extension["sometype"] -} - -// ----- - -// Test error if no symbol was found for `extension_type_variation` op. -substrait.plan version 0 : 42 : 1 { - // expected-error@+1 {{'substrait.extension_type_variation' op refers to @extension, which is not a valid 'uri' op}} - extension_type_variation @type_var at @extension["sometypevar"] -} - -// ----- - -// Test error if symbol was in the wrong scope. -substrait.extension_uri @extension at "http://some.url/with/extensions.yml" -substrait.plan version 0 : 42 : 1 { - // expected-error@+1 {{'substrait.extension_function' op refers to @extension, which is not a valid 'uri' op}} - extension_function @function at @extension["somefunc"] -} - -// ----- - -// Test error if no symbol refers to an op of the wrong type. -substrait.plan version 0 : 42 : 1 { - extension_uri @extension at "http://some.url/with/extensions.yml" - extension_function @function.1 at @extension["somefunc"] - // expected-error@+1 {{'substrait.extension_function' op refers to @function.1, which is not a valid 'uri' op}} - extension_function @function.2 at @function.1["somefunc"] -} diff --git a/test/Dialect/Substrait/plan.mlir b/test/Dialect/Substrait/plan.mlir deleted file mode 100644 index 81960379e1c4..000000000000 --- a/test/Dialect/Substrait/plan.mlir +++ /dev/null @@ -1,84 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK: substrait.plan version 0 : 42 : 1 -// CHECK-SAME: git_hash "hash" producer "producer" { -// CHECK-NEXT: } -substrait.plan - version 0 : 42 : 1 - git_hash "hash" - producer "producer" - {} - -// ----- - -// CHECK: substrait.plan version 0 : 42 : 1 { -// CHECK-NEXT: relation { -// CHECK-NEXT: named_table -// CHECK-NEXT: yield -// CHECK-NEXT: } -// CHECK-NEXT: } -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @foo::@bar as ["a", "b"] : tuple - yield %0 : tuple - } -} - -// ----- - -// CHECK: substrait.plan version 0 : 42 : 1 { -// CHECK-NEXT: relation { -// CHECK-NEXT: named_table -// CHECK-NEXT: yield -// CHECK-NEXT: } -// CHECK-NEXT: relation { -// CHECK-NEXT: named_table -// CHECK-NEXT: yield -// CHECK-NEXT: } -// CHECK-NEXT: } -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @foo::@bar as ["a", "b"] : tuple - yield %0 : tuple - } - relation { - %0 = named_table @foo::@bar as ["a", "b"] : tuple - yield %0 : tuple - } -} - -// ----- - -// CHECK: substrait.plan -// CHECK-NEXT: relation as ["x", "y", "z"] { -// CHECK-NEXT: named_table -// CHECK-NEXT: yield -// CHECK-NEXT: } -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - relation as ["x", "y", "z"] { - %0 = named_table @t as ["a", "b", "c"] : tuple> - yield %0 : tuple> - } -} - -// ----- - -// CHECK: substrait.plan version 0 : 42 : 1 { -// CHECK-NEXT: extension_uri @extension at "http://some.url/with/extensions.yml" -// CHECK-NEXT: extension_function @function at @extension["somefunc"] -// CHECK-NEXT: extension_type @type at @extension["sometype"] -// CHECK-NEXT: extension_type_variation @type_var at @extension["sometypevar"] -// CHECK-NEXT: extension_uri @other.extension at "http://other.url/with/more/extensions.yml" -// CHECK-NEXT: extension_function @other.function at @other.extension["someotherfunc"] -// CHECK-NEXT: } -substrait.plan version 0 : 42 : 1 { - extension_uri @extension at "http://some.url/with/extensions.yml" - extension_function @function at @extension["somefunc"] - extension_type @type at @extension["sometype"] - extension_type_variation @type_var at @extension["sometypevar"] - extension_uri @other.extension at "http://other.url/with/more/extensions.yml" - extension_function @other.function at @other.extension["someotherfunc"] -} diff --git a/test/Dialect/Substrait/project-invalid.mlir b/test/Dialect/Substrait/project-invalid.mlir deleted file mode 100644 index 1166ecdb793f..000000000000 --- a/test/Dialect/Substrait/project-invalid.mlir +++ /dev/null @@ -1,58 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.project' op has output field type whose prefix is different from input field types ('si32' vs 'si1')}} - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %42 = literal 42 : si32 - yield %42 : si32 - } - yield %1 : tuple - } -} - -// ----- -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - // expected-error@+1 {{'substrait.project' op has output field type whose prefix is different from input field types ('si32', 'si32' vs 'si32')}} - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %42 = literal 42 : si32 - yield %42 : si32 - } - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.project' op has output field type whose new fields are different from the yielded operand types ('si1' vs 'si32')}} - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %42 = literal 42 : si32 - yield %42 : si32 - } - yield %1 : tuple - } -} - -// ----- - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - // expected-error@+1 {{'substrait.project' op has 'expressions' region with mismatching argument type (has: 'tuple', expected: 'tuple')}} - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %3 = field_reference %arg[0] : tuple - yield %3 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/project.mlir b/test/Dialect/Substrait/project.mlir deleted file mode 100644 index 02c8fff403a1..000000000000 --- a/test/Dialect/Substrait/project.mlir +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK: substrait.plan version 0 : 42 : 1 { -// CHECK-NEXT: relation -// CHECK: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : tuple -> tuple { -// CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple): -// CHECK-NEXT: %[[V2:.*]] = literal -1 : si1 -// CHECK-NEXT: %[[V3:.*]] = literal 42 : si32 -// CHECK-NEXT: yield %[[V2]], %[[V3]] : si1, si32 -// CHECK-NEXT: } -// CHECK-NEXT: yield %[[V1]] : - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %true = literal -1 : si1 - %42 = literal 42 : si32 - yield %true, %42 : si1, si32 - } - yield %1 : tuple - } -} - -// ----- - -// CHECK: substrait.plan version 0 : 42 : 1 { -// CHECK-NEXT: relation -// CHECK: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : tuple -> tuple { -// CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple): -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = project %0 : tuple -> tuple { - ^bb0(%arg0: tuple): - yield - } - yield %1 : tuple - } -} diff --git a/test/Dialect/Substrait/relation-invalid.mlir b/test/Dialect/Substrait/relation-invalid.mlir deleted file mode 100644 index 1118ba861e79..000000000000 --- a/test/Dialect/Substrait/relation-invalid.mlir +++ /dev/null @@ -1,47 +0,0 @@ -// RUN: structured-opt -verify-diagnostics -split-input-file %s - -// Test error if providing too many names (1 name for 0 fields). -substrait.plan version 0 : 42 : 1 { - // expected-error@+2 {{'substrait.relation' op has mismatching 'field_names' (["x", "y"]) and result type ('tuple')}} - // expected-note@+1 {{too many field names provided}} - relation as ["x", "y"] { - %0 = named_table @t1 as ["a"] : tuple - yield %0 : tuple - } -} - -// ----- - -// Test error if providing too few names (0 names for 1 field). -substrait.plan version 0 : 42 : 1 { - // expected-error@+2 {{'substrait.relation' op has mismatching 'field_names' (["x"]) and result type ('tuple')}} - // expected-error@+1 {{not enough field names provided}} - relation as ["x"] { - %0 = named_table @t1 as ["a", "b"] : tuple - yield %0 : tuple - } -} - - -// ----- - -// Test error if providing duplicate field names in the same nesting level. -substrait.plan version 0 : 42 : 1 { - // expected-error@+2 {{'substrait.relation' op has mismatching 'field_names' (["x", "x"]) and result type ('tuple')}} - // expected-error@+1 {{duplicate field name: 'x'}} - relation as ["x", "x"] { - %0 = named_table @t1 as ["a", "b"] : tuple - yield %0 : tuple - } -} - -// ----- - -// Test error on wrong number of yielded values. -substrait.plan version 0 : 42 : 1 { - // expected-error@+1 {{'substrait.relation' op must have 'body' region yielding one value (yields 2)}} - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - yield %0, %0 : tuple, tuple - } -} diff --git a/test/Target/SubstraitPB/Export/call.mlir b/test/Target/SubstraitPB/Export/call.mlir deleted file mode 100644 index 0917085f5616..000000000000 --- a/test/Target/SubstraitPB/Export/call.mlir +++ /dev/null @@ -1,51 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf --split-input-file %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: --split-input-file="# -----" --output-split-marker="// ""-----" \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | FileCheck %s - -// CHECK: extension_uris { -// CHECK-NEXT: uri: "http://some.url/with/extensions.yml" -// CHECK-NEXT: } -// CHECK-NEXT: extensions { -// CHECK-NEXT: extension_function { -// CHECK-NEXT: name: "somefunc" -// CHECK-NEXT: } -// CHECK: extensions { -// CHECK-NEXT: extension_function { -// CHECK-NEXT: function_anchor: 1 -// CHECK-NEXT: name: "somefunc" -// CHECK: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: filter { -// CHECK-NOT: condition -// CHECK: condition { -// CHECK-NEXT: scalar_function { -// CHECK-NEXT: function_reference: 1 -// CHECK-NEXT: output_type { -// CHECK-NEXT: bool { -// CHECK-NEXT: nullability: NULLABILITY_REQUIRED -// CHECK: arguments { -// CHECK-NEXT: value { -// CHECK-NEXT: selection { - -substrait.plan version 0 : 42 : 1 { - extension_uri @extension at "http://some.url/with/extensions.yml" - extension_function @f1 at @extension["somefunc"] - extension_function @f2 at @extension["somefunc"] - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = field_reference %arg[0] : tuple - %3 = call @f2(%2) : (si32) -> si1 - yield %3 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Target/SubstraitPB/Export/cross.mlir b/test/Target/SubstraitPB/Export/cross.mlir deleted file mode 100644 index 71b48e0d2c78..000000000000 --- a/test/Target/SubstraitPB/Export/cross.mlir +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: | FileCheck %s - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: cross { -// CHECK-NEXT: common { -// CHECK-NEXT: direct { -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: left { -// CHECK-NEXT: read { -// CHECK: right { -// CHECK-NEXT: read { - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = named_table @t2 as ["b"] : tuple - %2 = cross %0 x %1 : tuple x tuple - yield %2 : tuple - } -} diff --git a/test/Target/SubstraitPB/Export/emit-invalid.mlir b/test/Target/SubstraitPB/Export/emit-invalid.mlir deleted file mode 100644 index 720869a757f5..000000000000 --- a/test/Target/SubstraitPB/Export/emit-invalid.mlir +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: structured-translate -verify-diagnostics -split-input-file %s \ -// RUN: -substrait-to-protobuf - -// Two subsequent `emit` ops: the export can't deal with that. - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - // expected-note@+1 {{op exported to 'input' message}} - %1 = emit [1, 0] from %0 : tuple -> tuple - // expected-error@+1 {{'substrait.emit' op has 'input' that already has 'emit' message (try running canonicalization?)}} - %2 = emit [1, 0] from %1 : tuple -> tuple - yield %2 : tuple - } -} diff --git a/test/Target/SubstraitPB/Export/emit.mlir b/test/Target/SubstraitPB/Export/emit.mlir deleted file mode 100644 index 7defc92f9f30..000000000000 --- a/test/Target/SubstraitPB/Export/emit.mlir +++ /dev/null @@ -1,79 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf --split-input-file %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: --split-input-file="# -----" --output-split-marker="// ""-----" \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | FileCheck %s - -// Checks that the `emit` field of a `crosss` is exported correctly. - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: cross { -// CHECK-NEXT: common { -// CHECK-NEXT: emit { -// CHECK-NEXT: output_mapping: 1 -// CHECK-NEXT: output_mapping: 0 -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = cross %0 x %0 : tuple x tuple - %2 = emit [1, 0] from %1 : tuple -> tuple - yield %2 : tuple - } -} - -// ----- - -// Checks that the `emit` field of a `named_table` is exported correctly. - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: read { -// CHECK-NEXT: common { -// CHECK-NEXT: emit { -// CHECK-NEXT: output_mapping: 1 -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1] from %0 : tuple -> tuple - yield %1 : tuple - } -} - -// ----- - -// Checks that the `emit` field of a `named_table` is exported correctly. - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: filter { -// CHECK-NEXT: common { -// CHECK-NEXT: emit { -// CHECK-NEXT: output_mapping: 1 -// CHECK-NEXT: } -// CHECK-LABEL: input { -// CHECK-NEXT: read { -// CHECK-NEXT: common { -// CHECK-NEXT: direct - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = literal -1 : si1 - yield %2 : si1 - } - %2 = emit [1] from %1 : tuple -> tuple - yield %2 : tuple - } -} diff --git a/test/Target/SubstraitPB/Export/field-reference.mlir b/test/Target/SubstraitPB/Export/field-reference.mlir deleted file mode 100644 index d35daceff83d..000000000000 --- a/test/Target/SubstraitPB/Export/field-reference.mlir +++ /dev/null @@ -1,76 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf --split-input-file %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: --split-input-file="# -----" --output-split-marker="// ""-----" \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | FileCheck %s - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: filter { -// CHECK-NEXT: common { -// CHECK-NEXT: direct { -// CHECK: input { -// CHECK: condition { -// CHECK-NEXT: selection { -// CHECK-NEXT: direct_reference { -// CHECK-NEXT: struct_field { -// CHECK-NEXT: field: 1 -// CHECK-NEXT: child { -// CHECK-NEXT: struct_field { -// CHECK: root_reference { -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b", "c"] : tuple> - %1 = filter %0 : tuple> { - ^bb0(%arg : tuple>): - %2 = field_reference %arg[1, 0] : tuple> - yield %2 : si1 - } - yield %1 : tuple> - } -} - -// ----- - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: filter { -// CHECK-NEXT: common { -// CHECK-NEXT: direct { -// CHECK: input { -// CHECK: condition { -// CHECK-NEXT: selection { -// CHECK-NEXT: direct_reference { -// CHECK-NEXT: struct_field { -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: expression { -// CHECK-NEXT: selection { -// CHECK-NEXT: direct_reference { -// CHECK-NEXT: struct_field { -// CHECK-NEXT: field: 1 -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: root_reference { -// CHECK-NEXT: } -// CHECK-NEXT: } - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b", "c"] : tuple> - %1 = filter %0 : tuple> { - ^bb0(%arg : tuple>): - %2 = field_reference %arg[1] : tuple> - %3 = field_reference %2[0] : tuple - yield %3 : si1 - } - yield %1 : tuple> - } -} diff --git a/test/Target/SubstraitPB/Export/filter.mlir b/test/Target/SubstraitPB/Export/filter.mlir deleted file mode 100644 index 289696af25b8..000000000000 --- a/test/Target/SubstraitPB/Export/filter.mlir +++ /dev/null @@ -1,29 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: | FileCheck %s - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: filter { -// CHECK-NEXT: common { -// CHECK-NEXT: direct { -// CHECK: input { -// CHECK: condition { -// CHECK-NEXT: literal { -// CHECK-NEXT: boolean: true - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = filter %0 : tuple { - ^bb0(%arg : tuple): - %2 = literal -1 : si1 - yield %2 : si1 - } - yield %1 : tuple - } -} diff --git a/test/Target/SubstraitPB/Export/plan.mlir b/test/Target/SubstraitPB/Export/plan.mlir deleted file mode 100644 index a2f929aefb28..000000000000 --- a/test/Target/SubstraitPB/Export/plan.mlir +++ /dev/null @@ -1,127 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf --split-input-file %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: --split-input-file="# -----" --output-split-marker="// ""-----" \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | FileCheck %s - -// CHECK-LABEL: version { -// CHECK-DAG: minor_number: 42 -// CHECK-DAG: patch_number: 1 -// CHECK-DAG: git_hash: "hash" -// CHECK-DAG: producer: "producer" -// CHECK-NEXT: } -substrait.plan - version 0 : 42 : 1 - git_hash "hash" - producer "producer" - {} - -// ----- - -// CHECK: relations { -// CHECK-NEXT: root { -// CHECK-NEXT: input { -// CHECK-NEXT: read { -// CHECK: named_table { -// CHECK-NEXT: names -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: names: "x" -// CHECK-NEXT: names: "y" -// CHECK-NEXT: names: "z" -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: version - -substrait.plan version 0 : 42 : 1 { - relation as ["x", "y", "z"] { - %0 = named_table @t as ["a", "b", "c"] : tuple> - yield %0 : tuple> - } -} - -// ----- - -// CHECK: extension_uris { -// CHECK-NEXT: extension_uri_anchor: 1 -// CHECK-NEXT: uri: "http://url.1/with/extensions.yml" -// CHECK: extension_uris { -// CHECK-NEXT: extension_uri_anchor: 2 -// CHECK-NEXT: uri: "http://url.2/with/extensions.yml" -// CHECK: extension_uris { -// CHECK-NEXT: extension_uri_anchor: 42 -// CHECK-NEXT: uri: "http://url.42/with/extensions.yml" -// CHECK: extension_uris { -// CHECK-NEXT: uri: "http://some.url/with/extensions.yml" -// CHECK: extension_uris { -// CHECK-NEXT: extension_uri_anchor: 3 -// CHECK-NEXT: uri: "http://url.foo/with/extensions.yml" -// CHECK: extension_uris { -// CHECK-NEXT: extension_uri_anchor: 4 -// CHECK-NEXT: uri: "http://url.bar/with/extensions.yml" -// CHECK: extensions { -// CHECK-NEXT: extension_function { -// CHECK-NEXT: extension_uri_reference: 1 -// CHECK-NEXT: function_anchor: 1 -// CHECK-NEXT: name: "func1" -// CHECK: extensions { -// CHECK-NEXT: extension_function { -// CHECK-NEXT: extension_uri_reference: 42 -// CHECK-NEXT: function_anchor: 42 -// CHECK-NEXT: name: "func42" -// CHECK: extensions { -// CHECK-NEXT: extension_type { -// CHECK-NEXT: extension_uri_reference: 2 -// CHECK-NEXT: type_anchor: 1 -// CHECK-NEXT: name: "type1" -// CHECK: extensions { -// CHECK-NEXT: extension_type { -// CHECK-NEXT: extension_uri_reference: 2 -// CHECK-NEXT: type_anchor: 42 -// CHECK-NEXT: name: "type42" -// CHECK: extensions { -// CHECK-NEXT: extension_type_variation { -// CHECK-NEXT: extension_uri_reference: 1 -// CHECK-NEXT: type_variation_anchor: 1 -// CHECK-NEXT: name: "typevar1" -// CHECK: extensions { -// CHECK-NEXT: extension_type_variation { -// CHECK-NEXT: extension_uri_reference: 1 -// CHECK-NEXT: type_variation_anchor: 42 -// CHECK-NEXT: name: "typevar2" - -substrait.plan version 0 : 42 : 1 { - extension_uri @extension_uri.1 at "http://url.1/with/extensions.yml" - extension_uri @extension_uri.2 at "http://url.2/with/extensions.yml" - extension_uri @extension_uri.42 at "http://url.42/with/extensions.yml" - extension_uri @extension at "http://some.url/with/extensions.yml" - extension_uri @extension_uri.foo at "http://url.foo/with/extensions.yml" - extension_uri @extension_uri.bar at "http://url.bar/with/extensions.yml" - extension_function @extension_function.1 at @extension_uri.1["func1"] - extension_function @extension_function.42 at @extension_uri.42["func42"] - extension_type @extension_type.1 at @extension_uri.2["type1"] - extension_type @extension_type.42 at @extension_uri.2["type42"] - extension_type_variation @extension_type_variation.1 at @extension_uri.1["typevar1"] - extension_type_variation @extension_type_variation.42 at @extension_uri.1["typevar2"] -} - -// ----- - - -// CHECK: extension_uris { -// CHECK-NEXT: uri: "http://some.url/with/extensions.yml" -// CHECK: extension_uris { -// CHECK-NEXT: extension_uri_anchor: 1 -// CHECK-NEXT: uri: "http://other.url/with/more/extensions.yml" - -substrait.plan version 0 : 42 : 1 { - extension_uri @extension at "http://some.url/with/extensions.yml" - // If not handled carefully, parsing this symbol into an anchor may clash. - extension_uri @extension_uri.0 at "http://other.url/with/more/extensions.yml" -} diff --git a/test/Target/SubstraitPB/Export/project.mlir b/test/Target/SubstraitPB/Export/project.mlir deleted file mode 100644 index 5dd727215bd0..000000000000 --- a/test/Target/SubstraitPB/Export/project.mlir +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: | FileCheck %s - -// CHECK: substrait.plan version 0 : 42 : 1 { -// CHECK-NEXT: relation -// CHECK: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : tuple -> tuple { -// CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple): -// CHECK-NEXT: %[[V2:.*]] = literal -1 : si1 -// CHECK-NEXT: %[[V3:.*]] = literal 42 : si32 -// CHECK-NEXT: yield %[[V2]], %[[V3]] : si1, si32 -// CHECK-NEXT: } -// CHECK-NEXT: yield %[[V1]] : - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %true = literal -1 : si1 - %42 = literal 42 : si32 - yield %true, %42 : si1, si32 - } - yield %1 : tuple - } -} diff --git a/test/Target/SubstraitPB/Export/types.mlir b/test/Target/SubstraitPB/Export/types.mlir deleted file mode 100644 index 4c40f5115a53..000000000000 --- a/test/Target/SubstraitPB/Export/types.mlir +++ /dev/null @@ -1,69 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf --split-input-file %s \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: --split-input-file="# -----" --output-split-marker="// ""-----" \ -// RUN: | structured-translate -substrait-to-protobuf \ -// RUN: --split-input-file --output-split-marker="# -----" \ -// RUN: | FileCheck %s - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: read { -// CHECK: base_schema { -// CHECK-NEXT: names: "a" -// CHECK-NEXT: struct { -// CHECK-NEXT: types { -// CHECK-NEXT: i32 { -// CHECK-NEXT: nullability: NULLABILITY_REQUIRED -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: nullability: NULLABILITY_REQUIRED -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: named_table { - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - yield %0 : tuple - } -} - -// ----- - -// CHECK-LABEL: relations { -// CHECK-NEXT: rel { -// CHECK-NEXT: read { -// CHECK: base_schema { -// CHECK-NEXT: names: "a" -// CHECK-NEXT: names: "b" -// CHECK-NEXT: names: "c" -// CHECK-NEXT: struct { -// CHECK-NEXT: types { -// CHECK-NEXT: bool { -// CHECK-NEXT: nullability: NULLABILITY_REQUIRED -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: types { -// CHECK-NEXT: struct { -// CHECK-NEXT: types { -// CHECK-NEXT: bool { -// CHECK-NEXT: nullability: NULLABILITY_REQUIRED -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: nullability: NULLABILITY_REQUIRED -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: named_table { - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b", "c"] : tuple> - yield %0 : tuple> - } -} diff --git a/test/Target/SubstraitPB/Import/call.textpb b/test/Target/SubstraitPB/Import/call.textpb deleted file mode 100644 index 310ee556e23f..000000000000 --- a/test/Target/SubstraitPB/Import/call.textpb +++ /dev/null @@ -1,96 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: --split-input-file --output-split-marker="# ""-----" \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | FileCheck %s - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: extension_uri @[[URI:.*]] at "http://some.url/with/extensions.yml" -# CHECK-NEXT: extension_function @[[F1:.*]] at @[[URI]]["somefunc"] -# CHECK-NEXT: extension_function @[[F2:.*]] at @[[URI]]["somefunc"] -# CHECK-NEXT: relation -# CHECK-NEXT: named_table -# CHECK-NEXT: filter -# CHECK-NEXT: (%[[V0:.*]]: tuple): -# CHECK-NEXT: %[[V1:.*]] = field_reference %[[V0]][0] : tuple -# CHECK-NEXT: %[[V2:.*]] = call @[[F2]](%[[V1]]) : (si32) -> si1 -# CHECK-NEXT: yield %[[V2]] : si1 - -extension_uris { - uri: "http://some.url/with/extensions.yml" -} -extensions { - extension_function { - name: "somefunc" - } -} -extensions { - extension_function { - function_anchor: 1 - name: "somefunc" - } -} -relations { - rel { - filter { - common { - direct { - } - } - input { - read { - common { - direct { - } - } - base_schema { - names: "a" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } - condition { - scalar_function { - function_reference: 1 - output_type { - bool { - nullability: NULLABILITY_REQUIRED - } - } - arguments { - value { - selection { - direct_reference { - struct_field { - } - } - root_reference { - } - } - } - } - } - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/cross.textpb b/test/Target/SubstraitPB/Import/cross.textpb deleted file mode 100644 index 6d1e3c768d03..000000000000 --- a/test/Target/SubstraitPB/Import/cross.textpb +++ /dev/null @@ -1,74 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: | FileCheck %s - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation { -# CHECK-NEXT: %[[V0:.*]] = named_table -# CHECK-NEXT: %[[V1:.*]] = named_table -# CHECK-NEXT: %[[V2:.*]] = cross %[[V0]] x %[[V1]] -# CHECK-SAME: : tuple x tuple -# CHECK-NEXT: yield %[[V2]] : tuple - -relations { - rel { - cross { - common { - direct { - } - } - left { - read { - common { - direct { - } - } - base_schema { - names: "a" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } - right { - read { - common { - direct { - } - } - base_schema { - names: "b" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t2" - } - } - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/emit.textpb b/test/Target/SubstraitPB/Import/emit.textpb deleted file mode 100644 index 30bf2104d0e0..000000000000 --- a/test/Target/SubstraitPB/Import/emit.textpb +++ /dev/null @@ -1,137 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: --split-input-file --output-split-marker="# ""-----" \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | FileCheck %s - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: %[[V0:.*]] = named_table -# CHECK-NEXT: %[[V1:.*]] = emit [1, 0] from %[[V0]] -# CHECK-NEXT: yield %[[V1]] - -relations { - rel { - read { - common { - emit { - output_mapping: 1 - output_mapping: 0 - } - } - base_schema { - names: "a" - names: "b" - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} - -# ----- - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: %[[V0:.*]] = named_table -# CHECK-NEXT: %[[V1:.*]] = emit [0, 0] from %[[V0]] -# CHECK-NEXT: yield %[[V1]] - -relations { - rel { - read { - common { - emit { - output_mapping: 0 - output_mapping: 0 - } - } - base_schema { - names: "a" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} - -# ----- - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: %[[V0:.*]] = named_table -# CHECK-NEXT: %[[V1:.*]] = emit [1] from %[[V0]] -# CHECK-NEXT: yield %[[V1]] - -relations { - rel { - read { - common { - emit { - output_mapping: 1 - } - } - base_schema { - names: "a" - names: "b" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/field-reference.textpb b/test/Target/SubstraitPB/Import/field-reference.textpb deleted file mode 100644 index f6c176a7ca41..000000000000 --- a/test/Target/SubstraitPB/Import/field-reference.textpb +++ /dev/null @@ -1,163 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: --split-input-file --output-split-marker="# ""-----" \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | FileCheck %s - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: named_table -# CHECK-NEXT: filter -# CHECK-NEXT: (%[[ARG0:.*]]: tuple>) -# CHECK-NEXT: %[[V0:.*]] = field_reference %[[ARG0]][1, 0] -# CHECK-SAME: : tuple> -# CHECK-NEXT: yield %[[V0]] : si1 - -relations { - rel { - filter { - common { - direct { - } - } - input { - read { - common { - direct { - } - } - base_schema { - names: "a" - names: "b" - names: "c" - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - types { - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } - condition { - selection { - direct_reference { - struct_field { - field: 1 - child { - struct_field { - } - } - } - } - root_reference { - } - } - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} - -# ----- - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: named_table -# CHECK-NEXT: filter -# CHECK-NEXT: (%[[ARG0:.*]]: tuple>) -# CHECK-NEXT: %[[V0:.*]] = field_reference %[[ARG0]][1] -# CHECK-SAME: : tuple> -# CHECK-NEXT: %[[V1:.*]] = field_reference %[[V0]][0] -# CHECK-SAME: : tuple -# CHECK-NEXT: yield %[[V1]] : si1 - -relations { - rel { - filter { - common { - direct { - } - } - input { - read { - common { - direct { - } - } - base_schema { - names: "a" - names: "b" - names: "c" - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - types { - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } - condition { - selection { - direct_reference { - struct_field { - } - } - expression { - selection { - direct_reference { - struct_field { - field: 1 - } - } - root_reference { - } - } - } - } - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/filter.textpb b/test/Target/SubstraitPB/Import/filter.textpb deleted file mode 100644 index 38b6bc71715e..000000000000 --- a/test/Target/SubstraitPB/Import/filter.textpb +++ /dev/null @@ -1,59 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: | FileCheck %s - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation { -# CHECK-NEXT: %[[V0:.*]] = named_table -# CHECK-NEXT: %[[V1:.*]] = filter %[[V0]] : tuple -# CHECK-NEXT: ^bb0(%[[ARG0:.*]]: tuple): -# CHECK-NEXT: %[[V2:.*]] = literal -1 : si1 -# CHECK-NEXT: yield %[[V2]] : si1 -# CHECK-NEXT: } -# CHECK-NEXT: yield %[[V1]] : tuple - -relations { - rel { - filter { - common { - direct { - } - } - input { - read { - common { - direct { - } - } - base_schema { - names: "a" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } - condition { - literal { - boolean: true - } - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/plan.textpb b/test/Target/SubstraitPB/Import/plan.textpb deleted file mode 100644 index c44c95b20af1..000000000000 --- a/test/Target/SubstraitPB/Import/plan.textpb +++ /dev/null @@ -1,149 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: --split-input-file --output-split-marker="# ""-----" \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | FileCheck %s - -# CHECK-LABEL: substrait.plan version 0 : 42 : 1 -# CHECK-SAME: git_hash "hash" producer "producer" { -# CHECK-NEXT: } -version { - minor_number: 42 - patch_number: 1 - git_hash: "hash" - producer: "producer" -} - -# ----- - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: relation { -# CHECK-NEXT: %[[V0:.*]] = named_table @t1 as ["a", "b"] : tuple -# CHECK-NEXT: yield %[[V0]] : tuple -relations { - rel { - read { - common { - direct { - } - } - base_schema { - names: "a" - names: "b" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} - -# ----- - -# CHECK-LABEL: substrait.plan -# CHECK-NEXT: extension_uri @extension_uri.1 at "http://url.1/with/extensions.yml" -# CHECK-NEXT: extension_uri @extension_uri.2 at "http://url.2/with/extensions.yml" -# CHECK-NEXT: extension_uri @extension_uri.42 at "http://url.42/with/extensions.yml" -# CHECK-NEXT: extension_uri @extension_uri.0 at "http://some.url/with/extensions.yml" -# CHECK-NEXT: extension_uri @extension_uri.3 at "http://url.foo/with/extensions.yml" -# CHECK-NEXT: extension_uri @extension_uri.4 at "http://url.bar/with/extensions.yml" -# CHECK-NEXT: extension_function @extension_function.1 at @extension_uri.1["func1"] -# CHECK-NEXT: extension_function @extension_function.42 at @extension_uri.42["func42"] -# CHECK-NEXT: extension_type @extension_type.1 at @extension_uri.2["type1"] -# CHECK-NEXT: extension_type @extension_type.42 at @extension_uri.2["type42"] -# CHECK-NEXT: extension_type_variation @extension_type_variation.1 at @extension_uri.1["typevar1"] -# CHECK-NEXT: extension_type_variation @extension_type_variation.42 at @extension_uri.1["typevar2"] -# CHECK-NEXT: } - -extension_uris { - extension_uri_anchor: 1 - uri: "http://url.1/with/extensions.yml" -} -extension_uris { - extension_uri_anchor: 2 - uri: "http://url.2/with/extensions.yml" -} -extension_uris { - extension_uri_anchor: 42 - uri: "http://url.42/with/extensions.yml" -} -extension_uris { - uri: "http://some.url/with/extensions.yml" -} -extension_uris { - extension_uri_anchor: 3 - uri: "http://url.foo/with/extensions.yml" -} -extension_uris { - extension_uri_anchor: 4 - uri: "http://url.bar/with/extensions.yml" -} -extensions { - extension_function { - extension_uri_reference: 1 - function_anchor: 1 - name: "func1" - } -} -extensions { - extension_function { - extension_uri_reference: 42 - function_anchor: 42 - name: "func42" - } -} -extensions { - extension_type { - extension_uri_reference: 2 - type_anchor: 1 - name: "type1" - } -} -extensions { - extension_type { - extension_uri_reference: 2 - type_anchor: 42 - name: "type42" - } -} -extensions { - extension_type_variation { - extension_uri_reference: 1 - type_variation_anchor: 1 - name: "typevar1" - } -} -extensions { - extension_type_variation { - extension_uri_reference: 1 - type_variation_anchor: 42 - name: "typevar2" - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/project.textpb b/test/Target/SubstraitPB/Import/project.textpb deleted file mode 100644 index c42ee7026687..000000000000 --- a/test/Target/SubstraitPB/Import/project.textpb +++ /dev/null @@ -1,65 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: | FileCheck %s - -# CHECK: substrait.plan version 0 : 42 : 1 { -# CHECK-NEXT: relation -# CHECK: %[[V0:.*]] = named_table -# CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : tuple -> tuple { -# CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple): -# CHECK-NEXT: %[[V2:.*]] = literal -1 : si1 -# CHECK-NEXT: %[[V3:.*]] = literal 42 : si32 -# CHECK-NEXT: yield %[[V2]], %[[V3]] : si1, si32 -# CHECK-NEXT: } -# CHECK-NEXT: yield %[[V1]] : - -relations { - rel { - project { - common { - direct { - } - } - input { - read { - common { - direct { - } - } - base_schema { - names: "a" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } - expressions { - literal { - boolean: true - } - } - expressions { - literal { - i32: 42 - } - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/Import/types.textpb b/test/Target/SubstraitPB/Import/types.textpb deleted file mode 100644 index cd6e7e49aab4..000000000000 --- a/test/Target/SubstraitPB/Import/types.textpb +++ /dev/null @@ -1,92 +0,0 @@ -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" \ -# RUN: | FileCheck %s - -# RUN: structured-translate -protobuf-to-substrait %s \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | structured-translate -substrait-to-protobuf \ -# RUN: --split-input-file --output-split-marker="# ""-----" \ -# RUN: | structured-translate -protobuf-to-substrait \ -# RUN: --split-input-file="# ""-----" --output-split-marker="// -----" \ -# RUN: | FileCheck %s - -# CHECK: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: named_table -# CHECK-SAME: : tuple - -relations { - rel { - read { - common { - direct { - } - } - base_schema { - names: "a" - struct { - types { - i32 { - nullability: NULLABILITY_REQUIRED - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} - -# ----- - -# CHECK: substrait.plan -# CHECK-NEXT: relation -# CHECK-NEXT: named_table -# CHECK-SAME: : tuple> - -relations { - rel { - read { - common { - direct { - } - } - base_schema { - names: "a" - names: "b" - names: "c" - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - types { - struct { - types { - bool { - nullability: NULLABILITY_REQUIRED - } - } - } - } - nullability: NULLABILITY_REQUIRED - } - } - named_table { - names: "t1" - } - } - } -} -version { - minor_number: 42 - patch_number: 1 -} diff --git a/test/Target/SubstraitPB/protobuf-formats.mlir b/test/Target/SubstraitPB/protobuf-formats.mlir deleted file mode 100644 index 42ec93f528bf..000000000000 --- a/test/Target/SubstraitPB/protobuf-formats.mlir +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=text \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: -substrait-protobuf-format=text \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=text \ -// RUN: | FileCheck --check-prefix=CHECK-TEXT %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=binary \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: -substrait-protobuf-format=binary \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=binary \ -// RUN: | FileCheck --check-prefix=CHECK-BINARY %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=json \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: -substrait-protobuf-format=json \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=json \ -// RUN: | FileCheck --check-prefix=CHECK-JSON %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=pretty-json \ -// RUN: | structured-translate -protobuf-to-substrait \ -// RUN: -substrait-protobuf-format=pretty-json \ -// RUN: | FileCheck %s - -// RUN: structured-translate -substrait-to-protobuf %s \ -// RUN: -substrait-protobuf-format=pretty-json \ -// RUN: | FileCheck --check-prefix=CHECK-PRETTYJSON %s - -substrait.plan version 0 : 42 : 1 {} -// CHECK: substrait.plan version 0 : 42 : 1 { - -// CHECK-TEXT: version { -// CHECK-TEXT-NEXT: minor_number: 42 - -// CHECK-BINARY: 2 - -// CHECK-JSON: {"version":{"minorNumber":42,"patchNumber":1}} - -// CHECK-PRETTYJSON: "version": { -// CHECK-PRETTYJSON-NEXT: "minorNumber": 42, diff --git a/test/Transforms/Substrait/emit-deduplication.mlir b/test/Transforms/Substrait/emit-deduplication.mlir deleted file mode 100644 index 0a7d53213e8f..000000000000 --- a/test/Transforms/Substrait/emit-deduplication.mlir +++ /dev/null @@ -1,321 +0,0 @@ -// RUN: structured-opt -split-input-file %s \ -// RUN: -substrait-emit-deduplication -allow-unregistered-dialect \ -// RUN: | FileCheck %s - -// `cross` op with left `emit` input with duplicates. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-NEXT: %[[V2:.*]] = cross %[[V1]] x %[[V0]] : -// CHECK-NEXT: %[[V3:.*]] = emit [0, 0, 1, 1, 0, 2, 3] from %[[V2]] : -// CHECK-NEXT: yield %[[V3]] : tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 1, 0, 0, 1] from %0 : tuple -> tuple - %2 = cross %1 x %0 : tuple x tuple - yield %2 : tuple - } -} - -// ----- - -// `cross` op with left `emit` input without duplicates. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-NEXT: %[[V2:.*]] = cross %[[V0]] x %[[V1]] : -// CHECK-NEXT: yield %[[V2]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 0] from %0 : tuple -> tuple - %2 = cross %0 x %1 : tuple x tuple - yield %2 : tuple - } -} - -// ----- - -// `cross` op with right `emit` input with duplicates. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-NEXT: %[[V2:.*]] = cross %[[V0]] x %[[V1]] : -// CHECK-NEXT: %[[V3:.*]] = emit [0, 1, 2, 2, 3, 3, 2] from %[[V2]] : -// CHECK-NEXT: yield %[[V3]] : tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 1, 0, 0, 1] from %0 : tuple -> tuple - %2 = cross %0 x %1 : tuple x tuple - yield %2 : tuple - } -} - -// ----- - -// `cross` op with right `emit` input without duplicates. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-NEXT: %[[V2:.*]] = cross %[[V1]] x %[[V0]] : -// CHECK-NEXT: yield %[[V2]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 0] from %0 : tuple -> tuple - %2 = cross %1 x %0 : tuple x tuple - yield %2 : tuple - } -} - -// ----- - -// `cross` op with two `emit` inputs with duplicates. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-DAG: %[[V1:.*]] = emit [1] from %[[V0]] : -// CHECK-DAG: %[[V2:.*]] = emit [0] from %[[V0]] : -// CHECK-NEXT: %[[V3:.*]] = cross %[[V1]] x %[[V2]] : -// CHECK-NEXT: %[[V4:.*]] = emit [0, 0, 1, 1] from %[[V3]] : -// CHECK-NEXT: yield %[[V4]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 1] from %0 : tuple -> tuple - %2 = emit [0, 0] from %0 : tuple -> tuple - %3 = cross %1 x %2 : tuple x tuple - yield %3 : tuple - } -} - -// ----- - -// `cross` op with mixed `emit` duplicates/no duplicates inputs. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-DAG: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-DAG: %[[V2:.*]] = emit [0] from %[[V0]] : -// CHECK-NEXT: %[[V3:.*]] = cross %[[V1]] x %[[V2]] : -// CHECK-NEXT: %[[V4:.*]] = emit [0, 1, 2, 2] from %[[V3]] : -// CHECK-NEXT: yield %[[V4]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 0] from %0 : tuple -> tuple - %2 = emit [0, 0] from %0 : tuple -> tuple - %3 = cross %1 x %2 : tuple x tuple - yield %3 : tuple - } -} - -// ----- - -// `cross` op with mixed `emit` duplicates/no duplicates inputs. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-DAG: %[[V1:.*]] = emit [1, 0] from %[[V0]] : -// CHECK-DAG: %[[V2:.*]] = emit [1] from %[[V0]] : -// CHECK-NEXT: %[[V3:.*]] = cross %[[V2]] x %[[V1]] : -// CHECK-NEXT: %[[V4:.*]] = emit [0, 0, 1, 2] from %[[V3]] : -// CHECK-NEXT: yield %[[V4]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 1] from %0 : tuple -> tuple - %2 = emit [1, 0] from %0 : tuple -> tuple - %3 = cross %1 x %2 : tuple x tuple - yield %3 : tuple - } -} - -// ----- - -// `filter` op (`PushDuplicatesThroughFilterPattern`). - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 2, 0] from %[[V0]] : -// CHECK-NEXT: %[[V2:.*]] = filter %[[V1]] : {{.*}} { -// CHECK-NEXT: ^{{.*}}(%[[ARG0:.*]]: [[TYPE:.*]]): -// CHECK-NEXT: %[[V3:.*]] = field_reference %[[ARG0]][0] : [[TYPE]] -// CHECK-NEXT: %[[V5:.*]] = field_reference %[[ARG0]][1, 0] : [[TYPE]] -// CHECK-NEXT: %[[V6:.*]] = field_reference %[[ARG0]][1] : [[TYPE]] -// CHECK-NEXT: %[[V7:.*]] = field_reference %[[V6]][1] : -// CHECK-NEXT: %[[V9:.*]] = field_reference %[[ARG0]][2] : [[TYPE]] -// CHECK-NEXT: %[[Va:.*]] = "test.op"(%[[V3]], %[[V3]], %[[V5]], %[[V7]], %[[V3]], %[[V9]]) -// CHECK-NEXT: yield %[[Va]] : si1 -// CHECK-NEXT: } -// CHECK-NEXT: %[[Vb:.*]] = emit [0, 0, 1, 0, 2] from %[[V2]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b", "c", "d", "e"] : tuple> - // Fields in position 1 and 3 are duplicates of field in position 0, so we - // expect all references to the former to be replaced by the latter and an - // `emit` re-establishing the original fields after the `filter`. - %1 = emit [1, 1, 2, 1, 0] from %0 - : tuple> -> tuple, si1, si1> - %2 = filter %1 : tuple, si1, si1> { - ^bb0(%arg0: tuple, si1, si1>): - %3 = field_reference %arg0[0] : tuple, si1, si1> - %4 = field_reference %arg0[1] : tuple, si1, si1> - %5 = field_reference %arg0[2, 0] : tuple, si1, si1> - %6 = field_reference %arg0[2] : tuple, si1, si1> - %7 = field_reference %6[1] : tuple - %8 = field_reference %arg0[3] : tuple, si1, si1> - %9 = field_reference %arg0[4] : tuple, si1, si1> - %a = "test.op"(%3, %4, %5, %7, %8, %9) : (si1, si1, si1, si32, si1, si1) -> si1 - yield %a : si1 - } - yield %2 : tuple, si1, si1> - } -} - -// ----- - -// `project` op (`PushDuplicatesThroughProjectPattern`). - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1] from %[[V0]] : -// CHECK-NEXT: %[[V2:.*]] = project %[[V1]] : tuple -> tuple { -// CHECK-NEXT: ^{{.*}}(%[[ARG0:.*]]: [[TYPE:.*]]): -// CHECK-NEXT: %[[V3:.*]] = field_reference %[[ARG0]][0] : [[TYPE]] -// CHECK-NEXT: %[[V5:.*]] = "test.op"(%[[V3]], %[[V3]]) : -// CHECK-NEXT: yield %[[V5]] : si1 -// CHECK-NEXT: } -// CHECK-NEXT: %[[V6:.*]] = emit [0, 0, 1] from %[[V2]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 1] from %0 : tuple -> tuple - %2 = project %1 : tuple -> tuple { - ^bb0(%arg : tuple): - %3 = field_reference %arg[0] : tuple - %4 = field_reference %arg[1] : tuple - %5 = "test.op"(%3, %4) : (si32, si32) -> si1 - yield %5 : si1 - } - yield %2 : tuple - } -} - -// ----- - -// `project` op (`EliminateDuplicateYieldsInProjectPattern`). - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : {{.*}} { -// CHECK-NEXT: ^{{.*}}(%[[ARG0:.*]]: [[TYPE:.*]]): -// CHECK-NEXT: %[[V2:.*]] = field_reference %[[ARG0]][0] : [[TYPE]] -// CHECK-NEXT: %[[V3:.*]] = "test.op"(%[[V2]]) : -// CHECK-NEXT: yield %[[V3]] : si1 -// CHECK-NEXT: } -// CHECK-NEXT: %[[V4:.*]] = emit [0, 1, 1] from %[[V1]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a"] : tuple - %1 = project %0 : tuple -> tuple { - ^bb0(%arg : tuple): - %2 = field_reference %arg[0] : tuple - %3 = "test.op"(%2) : (si32) -> si1 - // We yield two times the same value. This pattern should remove one of - // the two and re-establish the duplicate with an `amit` after the - // `project`. - yield %3, %3 : si1, si1 - } - yield %1 : tuple - } -} - -// ----- - -// `project` op (`EliminateIdentityYieldsInProjectPattern`). - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : {{.*}} { -// CHECK-NEXT: ^{{.*}}(%[[ARG0:.*]]: [[TYPE:.*]]): -// CHECK-NEXT: %[[V2:.*]] = field_reference %[[ARG0]][0] : [[TYPE]] -// CHECK-NEXT: %[[V3:.*]] = "test.op"(%[[V2]]) : -// CHECK-NEXT: yield %[[V3]] : si1 -// CHECK-NEXT: } -// CHECK-NEXT: %[[V4:.*]] = emit [0, 1, 0, 2] from %[[V1]] - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = project %0 : tuple -> tuple { - ^bb0(%arg0: tuple): - %2 = field_reference %arg0[0] : tuple - %3 = "test.op"(%2) : (si32) -> si1 - // `%2` yields an input field without modifications. This pattern removes - // that yielding and re-establishes the duplicated field with an `emit` - // following the `project` instead. - yield %2, %3 : si32, si1 - } - yield %1 : tuple - } -} - -// ----- - -// End-to-end test of many patterns related to `project`. -// -// The example has duplicates in various places: (1) duplicate emit field in -// `%1`, (2) those are forwarded in the unmofified fields of the `project` in -// `%2`, (3) the two `field_references` ultimately refer to the same field, -// so (4) the `yield` of the `project` op yields duplicates, which are (5) -// both duplicates of the existing fields of the input to `project`. Through -// repeated pattern application, each duplicate is removed, making the next one -// obivous, until the `project` is empty and folded away. - -// CHECK-LABEL: substrait.plan -// CHECK-NEXT: relation -// CHECK-NEXT: %[[V0:.*]] = named_table -// CHECK-NEXT: %[[V1:.*]] = emit [1, 1, 1, 1] from %[[V0]] : -// CHECK-NEXT: yield %[[V1]] : tuple - -substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t1 as ["a", "b"] : tuple - %1 = emit [1, 1] from %0 : tuple -> tuple - %2 = project %1 : tuple -> tuple { - ^bb0(%arg : tuple): - %3 = field_reference %arg[0] : tuple - %4 = field_reference %arg[1] : tuple - yield %3, %4 : si32, si32 - } - yield %2 : tuple - } -} diff --git a/test/python/dialects/substrait/dialect.py b/test/python/dialects/substrait/dialect.py deleted file mode 100644 index 8791885efdab..000000000000 --- a/test/python/dialects/substrait/dialect.py +++ /dev/null @@ -1,51 +0,0 @@ -# RUN: %PYTHON %s | FileCheck %s - -from mlir_structured.dialects import substrait as ss -from mlir_structured import ir - - -def run(f): - print("\nTEST:", f.__name__) - with ir.Context(), ir.Location.unknown(): - ss.register_dialect() - f() - return f - - -# CHECK-LABEL: TEST: testSubstraitDialect -@run -def testSubstraitDialect(): - plan = ss.PlanOp(version=(0, 42, 1)) - print(plan) - # CHECK: substrait.plan - - -# CHECK-LABEL: TEST: testPlanOp -@run -def testPlanOp(): - plan = ss.PlanOp(0, 42, 1) - print(plan) - # CHECK: substrait.plan version 0 : 42 : 1 - plan = ss.PlanOp(version=(0, 42, 1)) - print(plan) - # CHECK: substrait.plan version 0 : 42 : 1 - - -# CHECK-LABEL: TEST: testNamedTable -@run -def testNamedTable(): - plan = ss.PlanOp(version=(0, 42, 1)) - - with ir.InsertionPoint(plan.body): - plan_rel = ss.PlanRelOp() - with ir.InsertionPoint(plan_rel.body): - si32 = ir.IntegerType.get_signed(32) - result_type = ir.TupleType.get_tuple([si32, si32]) - field_names = ir.ArrayAttr.get([ir.StringAttr.get(n) for n in ["a", "b"]]) - named_table = ss.NamedTableOp(result_type, "t", field_names) - ss.YieldOp(named_table) - - print(plan) - # CHECK: substrait.plan - # CHECK: relation { - # CHECK: named_table @t diff --git a/test/python/dialects/substrait/e2e_datafusion.py b/test/python/dialects/substrait/e2e_datafusion.py deleted file mode 100644 index 05da2fca6354..000000000000 --- a/test/python/dialects/substrait/e2e_datafusion.py +++ /dev/null @@ -1,54 +0,0 @@ -# RUN: %PYTHON %s | FileCheck %s - -import datafusion -from datafusion import substrait as dfss -import pyarrow as pa - -from mlir_structured.dialects import substrait as ss -from mlir_structured import ir - - -def run(f): - print("\nTEST:", f.__name__) - with ir.Context(), ir.Location.unknown(): - ss.register_dialect() - f() - return f - - -# CHECK-LABEL: TEST: testNamedTable -@run -def testNamedTable(): - # Set up test table. - ctx = datafusion.SessionContext() - columns = {"a": [1, 2, 3], "b": [7, 8, 9]} - schema = pa.schema([('a', pa.int32()), ('b', pa.int32())]) - batch = pa.RecordBatch.from_pydict(columns, schema=schema) - ctx.register_record_batches("t", [[batch]]) - - # Set up test plan in MLIR. - plan = ir.Module.parse(''' - substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t as ["a", "b"] : tuple - yield %0 : tuple - } - } - ''') - - # Export MLIR plan to protobuf. - pb_plan = ss.to_binpb(plan.operation) - pb_plan = pb_plan.encode('utf8') - - # Import plan in datafusion, execute, and print result. - ss_plan = dfss.substrait.serde.deserialize_bytes(pb_plan) - df_plan = dfss.substrait.consumer.from_substrait_plan(ctx, ss_plan) - df = ctx.create_dataframe_from_logical_plan(df_plan) - - print(df.to_arrow_table()) - # CHECK-NEXT: pyarrow.Table - # CHECK-NEXT: a: int32 - # CHECK-NEXT: b: int32 - # CHECK-NEXT: ---- - # CHECK-NEXT{LITERAL}: a: [[1,2,3]] - # CHECK-NEXT{LITERAL}: b: [[7,8,9]] diff --git a/test/python/dialects/substrait/e2e_duckdb.py b/test/python/dialects/substrait/e2e_duckdb.py deleted file mode 100644 index c26b6fd2d331..000000000000 --- a/test/python/dialects/substrait/e2e_duckdb.py +++ /dev/null @@ -1,52 +0,0 @@ -# RUN: %PYTHON %s | FileCheck %s - -import duckdb - -from mlir_structured.dialects import substrait as ss -from mlir_structured import ir - - -def run(f): - print("\nTEST:", f.__name__) - with ir.Context(), ir.Location.unknown(): - ss.register_dialect() - f() - return f - - -# CHECK-LABEL: TEST: testNamedTable -@run -def testNamedTable(): - # Set up test table. - con = duckdb.connect() - con.install_extension("substrait") - con.load_extension("substrait") - - con.execute(query="CREATE TABLE t (a INT NOT NULL, b INT NOT NULL)") - con.execute(query="INSERT INTO t VALUES (1, 7)") - con.execute(query="INSERT INTO t VALUES (2, 8)") - con.execute(query="INSERT INTO t VALUES (3, 9)") - - # Set up test plan in MLIR. - plan = ir.Module.parse(''' - substrait.plan version 0 : 42 : 1 { - relation as ["a", "b"] { - %0 = named_table @t as ["a", "b"] : tuple - yield %0 : tuple - } - } - ''') - - # Export MLIR plan to protobuf. - pb_plan = ss.to_binpb(plan.operation).encode() - - # Execute in duckdb and print result. - query_result = con.from_substrait(proto=pb_plan) - - print(query_result.to_arrow_table()) - # CHECK-NEXT: pyarrow.Table - # CHECK-NEXT: a: int32 - # CHECK-NEXT: b: int32 - # CHECK-NEXT: ---- - # CHECK-NEXT{LITERAL}: a: [[1,2,3]] - # CHECK-NEXT{LITERAL}: b: [[7,8,9]] diff --git a/test/python/dialects/substrait/e2e_ibis.py b/test/python/dialects/substrait/e2e_ibis.py deleted file mode 100644 index 9e7680a5dca8..000000000000 --- a/test/python/dialects/substrait/e2e_ibis.py +++ /dev/null @@ -1,36 +0,0 @@ -# RUN: %PYTHON %s | FileCheck %s - -import ibis -from ibis_substrait.compiler import core as ibis_ss - -from mlir_structured.dialects import substrait as ss -from mlir_structured import ir - - -def run(f): - print("\nTEST:", f.__name__) - with ir.Context(), ir.Location.unknown(): - ss.register_dialect() - f() - return f - - -# CHECK-LABEL: TEST: testNamedTable -@run -def testNamedTable(): - # Set up test table. - table = ibis.table([("a", "int32"), ("b", "int32")], "t") - - # Create Substrait plan from Ibis expression. - compiler = ibis_ss.SubstraitCompiler() - pb_plan = compiler.compile(table) - - # Import into MLIR and print. - plan = ss.from_binpb(pb_plan.SerializeToString()) - print(plan) - - # CHECK-NEXT: module - # CHECK-NEXT: substrait.plan version {{.*}} producer "ibis-substrait" { - # CHECK-NEXT: relation as ["a", "b"] { - # CHECK-NEXT: %[[V0:.*]] = named_table @t as ["a", "b"] : tuple - # CHECK-NEXT: yield %[[V0]] : tuple diff --git a/test/python/dialects/substrait/e2e_pyarrow.py b/test/python/dialects/substrait/e2e_pyarrow.py deleted file mode 100644 index 119f9dbb7286..000000000000 --- a/test/python/dialects/substrait/e2e_pyarrow.py +++ /dev/null @@ -1,50 +0,0 @@ -# RUN: %PYTHON %s | FileCheck %s - -import pyarrow as pa -import pyarrow.lib -import pyarrow.substrait - -from mlir_structured.dialects import substrait as ss -from mlir_structured import ir - - -def run(f): - print("\nTEST:", f.__name__) - with ir.Context(), ir.Location.unknown(): - ss.register_dialect() - f() - return f - - -# CHECK-LABEL: TEST: testNamedTable -@run -def testNamedTable(): - plan = ir.Module.parse(''' - substrait.plan version 0 : 42 : 1 { - relation { - %0 = named_table @t as ["a", "b"] : tuple - yield %0 : tuple - } - } - ''') - - def table_provider(names, schema): - if names != ["t"]: - raise Exception("Unrecognized table name") - columns = {"a": [1, 2, 3], "b": [7, 8, 9]} - schema = pa.schema([('a', pa.int32()), ('b', pa.int32())]) - return pa.Table.from_pydict(columns, schema=schema) - - # Export MLIR to protobuf. - pb_plan = ss.to_binpb(plan.operation) - - # Execute in pyrrow and print result. - reader = pa.substrait.run_query(pa.lib.tobytes(pb_plan), - table_provider=table_provider) - print(reader.read_all()) - # CHECK-NEXT: pyarrow.Table - # CHECK-NEXT: a: int32 - # CHECK-NEXT: b: int32 - # CHECK-NEXT: ---- - # CHECK-NEXT{LITERAL}: a: [[1,2,3]] - # CHECK-NEXT{LITERAL}: b: [[7,8,9]] diff --git a/test/python/dialects/substrait/translate.py b/test/python/dialects/substrait/translate.py deleted file mode 100644 index 4d49de4b6048..000000000000 --- a/test/python/dialects/substrait/translate.py +++ /dev/null @@ -1,108 +0,0 @@ -# RUN: %PYTHON %s 2>&1 | FileCheck %s - -import json - -from mlir_structured.dialects import substrait as ss, arith -from mlir_structured.ir import Context, Location - -JSON_PLAN = ''' - { - "version": { - "minorNumber": 42, - "patchNumber": 1, - } - } -''' - - -def run(f): - print("\nTEST:", f.__name__) - with Context(), Location.unknown(): - ss.register_dialect() - f() - return f - - -# CHECK-LABEL: TEST: testJsonFormat -@run -def testJsonFormat(): - plan_module = ss.from_json(JSON_PLAN) - print(plan_module) - # CHECK: substrait.plan version - - json_plan = ss.to_json(plan_module.operation) - print(json_plan) - # CHECK: {"version":{"minorNumber":42,"patchNumber":1}} - - plan_op = plan_module.body.operations[0] - print(plan_op.to_json()) - # CHECK: {"version":{"minorNumber":42,"patchNumber":1}} - - json_plan = json.dumps(json.loads(json_plan)) - print(json_plan) - # CHECK: {"version": {"minorNumber": 42, "patchNumber": 1}} - - json_plan = ss.to_json(plan_module.operation, pretty=True) - print(json_plan) - # CHECK: "version": { - # CHECK-NEXT: "minorNumber": 42, - # CHECK-NEXT: "patchNumber": 1 - - -# CHECK-LABEL: TEST: testTextPB -@run -def testTextPB(): - plan_module = ss.from_json(JSON_PLAN) - - text_plan = ss.to_textpb(plan_module.operation) - print(text_plan) - # CHECK: version { - # CHECK-NEXT: minor_number: 42 - # CHECK-NEXT: patch_number: 1 - - plan_op = plan_module.body.operations[0] - print(plan_op.to_textpb()) - # CHECK: version { - - plan_module = ss.from_textpb(text_plan) - print(plan_module) - # CHECK: substrait.plan version - - -# CHECK-LABEL: TEST: testBinPB -@run -def testBinPB(): - plan_module = ss.from_json(JSON_PLAN) - - bin_plan = ss.to_binpb(plan_module.operation) - print(bin_plan) - # CHECK: 2 - - plan_op = plan_module.body.operations[0] - print(plan_op.to_binpb()) - # CHECK: 2 - - plan_module = ss.from_binpb(bin_plan) - print(plan_module) - # CHECK: substrait.plan version - - -# CHECK-LABEL: TEST: testInvalid -@run -def testInvalid(): - try: - ss.from_json('this is not json') - # CHECK-NEXT: error: could not deserialize JSON as 'Plan' message: - # CHECK-NEXT: Unexpected token. - # CHECK-NEXT: this is not json - except ValueError as ex: - print(ex) - # CHECK: Could not import Substrait plan - - const_op = arith.ConstantOp.create_index(42) - try: - ss.to_json(const_op) - # CHECK-NEXT: error: 'arith.constant' op not supported for export - except ValueError as ex: - print(ex) - # CHECK-NEXT: Could not export Substrait plan diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt deleted file mode 100644 index 2c21a79ef926..000000000000 --- a/third_party/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -################################################################################ -# Substrait -################################################################################ - -# The way how Substrait uses sanitizers seems to be incompatible with some flags -# set by LLVM, leading to linker errors, so we deactivate them. -set(SUBSTRAIT_CPP_SANITIZE_DEBUG_BUILD OFF) - -# In install mode, Abseil creates a target call `check`, which conflicts with an -# LLVM target with the same name. In non-install mode, the target name changes. -set(ABSL_ENABLE_INSTALL OFF) - -# LLVM sets this to `ON`, leading to a compilation error due to a file called -# `time.h` in one of Abseil's folders. -set(CMAKE_INCLUDE_CURRENT_DIR OFF) - -if(${LLVM_INCLUDE_TESTS}) - message(FATAL_ERROR "LLVM_INCLUDE_TESTS is enabled but breaks the build due " - "to a target name clash with 'abseil-cpp'. Please run " - "cmake again with '-DLLVM_INCLUDE_TESTS=OFF'.") -endif() - -# Add `substrait-cpp` as a subdirectory with above settings. -set(SUBSTRAIT_CPP_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/substrait-cpp) -add_subdirectory(${SUBSTRAIT_CPP_ROOT_DIR}) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp deleted file mode 160000 index 1dbf98b548de..000000000000 --- a/third_party/substrait-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1dbf98b548de3ef11cac2a42075b87f57e7004b9 diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index a6246560ac55..8141e43d64b1 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -3,4 +3,3 @@ add_dependencies(structured-all structured-tools) add_subdirectory(structured-lsp-server) add_subdirectory(structured-opt) -add_subdirectory(structured-translate) diff --git a/tools/structured-lsp-server/structured-lsp-server.cpp b/tools/structured-lsp-server/structured-lsp-server.cpp index 1449e7331048..4c04f545e4f2 100644 --- a/tools/structured-lsp-server/structured-lsp-server.cpp +++ b/tools/structured-lsp-server/structured-lsp-server.cpp @@ -23,7 +23,6 @@ #include "structured/Conversion/Passes.h" #include "structured/Dialect/Iterators/IR/Iterators.h" #include "structured/Dialect/Iterators/Transforms/Passes.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" #include "structured/Dialect/Tabular/IR/Tabular.h" #include "structured/Dialect/Tuple/IR/Tuple.h" #include "structured/Dialect/Tuple/Transforms/Passes.h" @@ -42,10 +41,6 @@ static void registerIteratorDialects(DialectRegistry ®istry) { >(); } -static void registerSubstraitDialects(DialectRegistry ®istry) { - registry.insert(); -} - int main(int argc, char **argv) { #ifndef NDEBUG static std::string executable = @@ -62,7 +57,6 @@ int main(int argc, char **argv) { registerAllDialects(registry); registerAllExtensions(registry); registerIteratorDialects(registry); - registerSubstraitDialects(registry); return mlir::failed(mlir::MlirLspServerMain(argc, argv, registry)); } diff --git a/tools/structured-opt/structured-opt.cpp b/tools/structured-opt/structured-opt.cpp index 054e607856bc..141b7453a0a5 100644 --- a/tools/structured-opt/structured-opt.cpp +++ b/tools/structured-opt/structured-opt.cpp @@ -19,8 +19,6 @@ #include "structured/Conversion/Passes.h" #include "structured/Dialect/Iterators/IR/Iterators.h" #include "structured/Dialect/Iterators/Transforms/Passes.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" -#include "structured/Dialect/Substrait/Transforms/Passes.h" #include "structured/Dialect/Tabular/IR/Tabular.h" #include "structured/Dialect/Tuple/IR/Tuple.h" #include "structured/Dialect/Tuple/Transforms/Passes.h" @@ -32,7 +30,6 @@ #include "llvm/Support/ToolOutputFile.h" using namespace mlir; -using namespace mlir::substrait; static void registerIteratorDialects(DialectRegistry ®istry) { registry.insert< @@ -44,10 +41,6 @@ static void registerIteratorDialects(DialectRegistry ®istry) { >(); } -static void registerSubstraitDialects(DialectRegistry ®istry) { - registry.insert(); -} - int main(int argc, char **argv) { #ifndef NDEBUG static std::string executable = @@ -58,14 +51,12 @@ int main(int argc, char **argv) { registerAllPasses(); registerStructuredConversionPasses(); registerIteratorsPasses(); - registerSubstraitPasses(); registerTuplePasses(); DialectRegistry registry; registerAllDialects(registry); registerAllExtensions(registry); registerIteratorDialects(registry); - registerSubstraitDialects(registry); return failed( MlirOptMain(argc, argv, "MLIR modular optimizer driver\n", registry)); diff --git a/tools/structured-translate/CMakeLists.txt b/tools/structured-translate/CMakeLists.txt deleted file mode 100644 index 01ee0fb9af4d..000000000000 --- a/tools/structured-translate/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Support - ) - -get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS ) -get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) - -add_llvm_executable(structured-translate - structured-translate.cpp - ) -add_dependencies(structured-tools structured-translate) - -target_link_libraries(structured-translate - PRIVATE - ${dialect_libs} - ${translation_libs} - MLIRIR - MLIRParser - MLIRPass - MLIRTargetSubstraitPB - MLIRTranslateLib - MLIRSupport - ) - -mlir_check_all_link_libraries(structured-translate) diff --git a/tools/structured-translate/structured-translate.cpp b/tools/structured-translate/structured-translate.cpp deleted file mode 100644 index 018a6a880b64..000000000000 --- a/tools/structured-translate/structured-translate.cpp +++ /dev/null @@ -1,76 +0,0 @@ -//===-- structured-translate.cpp - "structured" mlir-translate --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// `mlir-stranslate` with translations to and from "structured" dialects, i.e., -// dialects from this repository. -//===----------------------------------------------------------------------===// - -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/Operation.h" -#include "mlir/InitAllTranslations.h" -#include "mlir/Support/LogicalResult.h" -#include "mlir/Tools/mlir-translate/MlirTranslateMain.h" -#include "mlir/Tools/mlir-translate/Translation.h" -#include "structured/Dialect/Substrait/IR/Substrait.h" -#include "structured/Target/SubstraitPB/Export.h" -#include "structured/Target/SubstraitPB/Import.h" -#include "structured/Target/SubstraitPB/Options.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" - -namespace mlir { -namespace substrait { - -llvm::cl::opt substraitProtobufFormat( - "substrait-protobuf-format", llvm::cl::ValueRequired, - llvm::cl::desc( - "Serialization format used when translating Substrait plans."), - llvm::cl::values( - clEnumValN(SerdeFormat::kText, "text", "human-readable text format"), - clEnumValN(SerdeFormat::kBinary, "binary", "binary wire format"), - clEnumValN(SerdeFormat::kJson, "json", "compact JSON format"), - clEnumValN(SerdeFormat::kPrettyJson, "pretty-json", - "JSON format with new lines")), - llvm::cl::init(SerdeFormat::kText)); - -static void registerSubstraitDialects(DialectRegistry ®istry) { - registry.insert(); -} - -void registerSubstraitToProtobufTranslation() { - TranslateFromMLIRRegistration registration( - "substrait-to-protobuf", "translate from Substrait MLIR to protobuf", - [&](mlir::Operation *op, llvm::raw_ostream &output) { - ImportExportOptions options; - options.serdeFormat = substraitProtobufFormat.getValue(); - return translateSubstraitToProtobuf(op, output, options); - }, - registerSubstraitDialects); -} - -void registerProtobufToSubstraitTranslation() { - TranslateToMLIRRegistration registration( - "protobuf-to-substrait", "translate from protobuf to Substrait MLIR", - [&](llvm::StringRef input, mlir::MLIRContext *context) { - ImportExportOptions options; - options.serdeFormat = substraitProtobufFormat.getValue(); - return translateProtobufToSubstrait(input, context, options); - }, - registerSubstraitDialects); -} - -} // namespace substrait -} // namespace mlir - -int main(int argc, char **argv) { - mlir::registerAllTranslations(); - mlir::substrait::registerSubstraitToProtobufTranslation(); - mlir::substrait::registerProtobufToSubstraitTranslation(); - - return failed( - mlir::mlirTranslateMain(argc, argv, "MLIR Translation Testing Tool")); -}