Skip to content

Commit

Permalink
[DAP] Add iir_simd operation for DAP dialect.
Browse files Browse the repository at this point in the history
  • Loading branch information
taiqzheng committed Oct 29, 2023
1 parent 19cfc6c commit abc66ab
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 12 deletions.
8 changes: 8 additions & 0 deletions frontend/Interfaces/buddy/DAP/DSP/IIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,17 @@ void _mlir_ciface_mlir_iir(MemRef<float, 1> *inputBuddyConv1D,
MemRef<float, 2> *kernelBuddyConv1D,
MemRef<float, 1> *outputBuddyConv1D);

void _mlir_ciface_mlir_iir_simd(MemRef<double, 1> *inputBuddyConv1D,
MemRef<double, 2> *kernelBuddyConv1D,
MemRef<double, 1> *outputBuddyConv1D);

void _mlir_ciface_buddy_iir(MemRef<float, 1> *inputBuddyConv1D,
MemRef<float, 2> *kernelBuddyConv1D,
MemRef<float, 1> *outputBuddyConv1D);

void _mlir_ciface_buddy_iir_simd(MemRef<double, 1> *inputBuddyConv1D,
MemRef<double, 2> *kernelBuddyConv1D,
MemRef<double, 1> *outputBuddyConv1D);
}
} // namespace detail

Expand Down
5 changes: 5 additions & 0 deletions frontend/Interfaces/lib/DAP.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ func.func @buddy_iir(%in : memref<?xf32>, %filter : memref<?x?xf32>, %out : memr
return
}

func.func @buddy_iir_simd(%in : memref<?xf64>, %filter : memref<?x?xf64>, %out : memref<?xf64>) -> () {
dap.iir_simd %in, %filter, %out : memref<?xf64>, memref<?x?xf64>, memref<?xf64>
return
}

func.func @buddy_biquad(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () {
dap.biquad %in, %filter, %out : memref<?xf32>, memref<?xf32>, memref<?xf32>
return
Expand Down
19 changes: 19 additions & 0 deletions midend/include/Dialect/DAP/DAPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,23 @@ def DAP_IirOp : DAP_Op<"iir"> {
}];
}

def DAP_IirSimdOp : DAP_Op<"iir_simd"> {
let summary = [{IIR filter, a infinite impulse response (IIR), Unlike FIR filters,
they have a feedback(a recursive part of a filter). This is the SIMD version for iir operation.
```mlir
dsp.iir_simd %input, %kernel, %output :memref<?xf64>, memref<?x?xf64>,
memref<?xf64>
```
}];
let arguments = (ins Arg<AnyRankedOrUnrankedMemRef, "inputMemref",
[MemRead]>:$memrefI,
Arg<AnyRankedOrUnrankedMemRef, "kernelMemref",
[MemRead]>:$memrefK,
Arg<AnyRankedOrUnrankedMemRef, "outputMemref",
[MemRead]>:$memrefO);
let assemblyFormat = [{
$memrefI `,` $memrefK `,` $memrefO attr-dict `:` type($memrefI) `,` type($memrefK) `,` type($memrefO)
}];
}

#endif // DAP_DAPOPS_TD
239 changes: 227 additions & 12 deletions midend/lib/Conversion/LowerDAP/LowerDAPPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"

#include "DAP/DAPDialect.h"
#include "DAP/DAPOps.h"
Expand Down Expand Up @@ -282,25 +282,26 @@ class DAPIirLowering : public OpRewritePattern<dap::IirOp> {
// process the remain data of FIR part
Value idx1 = builder.create<SubIOp>(loc, upperN, c1);
Value idx2 = builder.create<SubIOp>(loc, upperN, c2);
Value in1 =
Value in1 =
builder.create<memref::LoadOp>(loc, input, ValueRange{idx1});
Value in2 =
Value in2 =
builder.create<memref::LoadOp>(loc, input, ValueRange{idx2});

builder.create<scf::ForOp>(
loc, upperN, N, c1, ValueRange{in1, in2},
loc, upperN, N, c1, ValueRange{in1, in2},
[&](OpBuilder &builder, Location loc, Value iv,
ValueRange itrargs) {
Value in0 =
Value in0 =
builder.create<memref::LoadOp>(loc, input, ValueRange{iv});

Value temp0 = builder.create<MulFOp>(loc, b0, in0);
Value temp1 = builder.create<MulFOp>(loc, b1, in1);
Value temp2 = builder.create<MulFOp>(loc, b2, in2);
Value sum0 = builder.create<AddFOp>(loc, temp0, temp1);
Value sum1 = builder.create<AddFOp>(loc, sum0, temp2);

builder.create<memref::StoreOp>(loc, sum1, output, ValueRange{iv});

builder.create<memref::StoreOp>(loc, sum1, output,
ValueRange{iv});

builder.create<scf::YieldOp>(loc, std::vector<Value>{in0, in1});
});
Expand Down Expand Up @@ -334,13 +335,226 @@ class DAPIirLowering : public OpRewritePattern<dap::IirOp> {
int64_t stride;
};

class DAPIirSimdLowering : public OpRewritePattern<dap::IirSimdOp> {
public:
using OpRewritePattern<dap::IirSimdOp>::OpRewritePattern;

explicit DAPIirSimdLowering(MLIRContext *context, int64_t strideParam)
: OpRewritePattern(context) {
stride = strideParam;
}

LogicalResult matchAndRewrite(dap::IirSimdOp op,
PatternRewriter &rewriter) const override {
auto loc = op->getLoc();
auto ctx = op->getContext();

Value input = op->getOperand(0);
Value kernel = op->getOperand(1);
Value output = op->getOperand(2);

Value c0 = rewriter.create<ConstantIndexOp>(loc, 0);
Value c1 = rewriter.create<ConstantIndexOp>(loc, 1);
Value c2 = rewriter.create<ConstantIndexOp>(loc, 2);
Value c4 = rewriter.create<ConstantIndexOp>(loc, 4);
Value c5 = rewriter.create<ConstantIndexOp>(loc, 5);
Value c15 = rewriter.create<ConstantIndexOp>(loc, 15);
// TODO : Change the vector length value to an Attribute
Value c16 = rewriter.create<ConstantIndexOp>(loc, 16);

Value N = rewriter.create<memref::DimOp>(loc, input, c0);
Value filterSize = rewriter.create<memref::DimOp>(loc, kernel, c0);
Value strideVal = rewriter.create<ConstantIndexOp>(loc, stride);

FloatType f64 = FloatType::getF64(ctx);

VectorType vectorTy64 = VectorType::get(16, f64);

Value f0 = rewriter.create<ConstantFloatOp>(loc, APFloat(0.0d), f64);
Value f1 = rewriter.create<ConstantFloatOp>(loc, APFloat(1.0d), f64);

Value initB0 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f1);
Value initB1 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);
Value initB2 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);
Value initA1 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);
Value initA2 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);

// Distribute all params into 5 param vectors
auto vecDistribute = rewriter.create<scf::ForOp>(
loc, c0, filterSize, c1,
ValueRange{initB0, initB1, initB2, initA1, initA2},
[&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) {
Value b0 =
builder.create<memref::LoadOp>(loc, kernel, ValueRange{iv, c0});
Value b1 =
builder.create<memref::LoadOp>(loc, kernel, ValueRange{iv, c1});
Value b2 =
builder.create<memref::LoadOp>(loc, kernel, ValueRange{iv, c2});
// Value a0 of kernel is not used
Value a1 =
builder.create<memref::LoadOp>(loc, kernel, ValueRange{iv, c4});
Value a2 =
builder.create<memref::LoadOp>(loc, kernel, ValueRange{iv, c5});

Value B0_next =
builder.create<vector::InsertElementOp>(loc, b0, iargs[0], iv);
Value B1_next =
builder.create<vector::InsertElementOp>(loc, b1, iargs[1], iv);
Value B2_next =
builder.create<vector::InsertElementOp>(loc, b2, iargs[2], iv);
Value A1_next =
builder.create<vector::InsertElementOp>(loc, a1, iargs[3], iv);
Value A2_next =
builder.create<vector::InsertElementOp>(loc, a2, iargs[4], iv);

builder.create<scf::YieldOp>(
loc,
std::vector<Value>{B0_next, B1_next, B2_next, A1_next, A2_next});
});

Value vecB0 = vecDistribute.getResult(0);
Value vecB1 = vecDistribute.getResult(1);
Value vecB2 = vecDistribute.getResult(2);
Value vecA1 = vecDistribute.getResult(3);
Value vecA2 = vecDistribute.getResult(4);

Value vecOut = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);
Value vecS1 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);
Value vecS2 = rewriter.create<vector::SplatOp>(loc, vectorTy64, f0);

// The SIMD version for IIR operation can represented as a pipeline with
// {c16} stages. This loop represent the injection section, loop {stages-1}
// times.
auto injectionResult = rewriter.create<scf::ForOp>(
loc, c0, c15, c1, ValueRange{vecOut, vecS1, vecS2},
[&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) {
Value in_elem = builder.create<memref::LoadOp>(loc, input, iv);
Value vecIn_move_right = builder.create<vector::ShuffleOp>(
loc, iargs[0], iargs[0],
ArrayRef<int64_t>{0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14});
Value vecIn_next = builder.create<vector::InsertElementOp>(
loc, in_elem, vecIn_move_right, c0);
Value vecOut_next =
builder.create<vector::FMAOp>(loc, vecB0, vecIn_next, iargs[1]);

Value vecS1_lhs =
builder.create<vector::FMAOp>(loc, vecB1, vecIn_next, iargs[2]);
Value vecS1_rhs =
builder.create<arith::MulFOp>(loc, vecA1, vecOut_next);
Value vecS1_next =
builder.create<arith::SubFOp>(loc, vecS1_lhs, vecS1_rhs);

Value vecS2_lhs =
builder.create<arith::MulFOp>(loc, vecB2, vecIn_next);
Value vecS2_rhs =
builder.create<arith::MulFOp>(loc, vecA2, vecOut_next);
Value vecS2_next =
builder.create<arith::SubFOp>(loc, vecS2_lhs, vecS2_rhs);

builder.create<scf::YieldOp>(
loc, std::vector<Value>{vecOut_next, vecS1_next, vecS2_next});
});

Value vecOut_tmp1 = injectionResult.getResult(0);
Value vecS1_tmp1 = injectionResult.getResult(1);
Value vecS2_tmp1 = injectionResult.getResult(2);

Value i15 =
rewriter.create<arith::ConstantIntOp>(loc, /*value=*/15, /*width=*/64);
Value upperBound = rewriter.create<arith::SubIOp>(loc, N, c15);

// This loop represent full process section, start to produce output.
auto processResult = rewriter.create<scf::ForOp>(
loc, c0, upperBound, c1,
ValueRange{vecOut_tmp1, vecS1_tmp1, vecS2_tmp1},
[&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) {
Value index = builder.create<arith::AddIOp>(loc, iv, c15);
Value in_elem = builder.create<memref::LoadOp>(loc, input, index);
Value vecIn_move_right = builder.create<vector::ShuffleOp>(
loc, iargs[0], iargs[0],
ArrayRef<int64_t>{0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14});
Value vecIn_next = builder.create<vector::InsertElementOp>(
loc, in_elem, vecIn_move_right, c0);
Value vecOut_next =
builder.create<vector::FMAOp>(loc, vecB0, vecIn_next, iargs[1]);
Value out_elem =
builder.create<vector::ExtractElementOp>(loc, vecOut_next, i15);
builder.create<memref::StoreOp>(loc, out_elem, output, iv);

Value vecS1_lhs =
builder.create<vector::FMAOp>(loc, vecB1, vecIn_next, iargs[2]);
Value vecS1_rhs =
builder.create<arith::MulFOp>(loc, vecA1, vecOut_next);
Value vecS1_next =
builder.create<arith::SubFOp>(loc, vecS1_lhs, vecS1_rhs);

Value vecS2_lhs =
builder.create<arith::MulFOp>(loc, vecB2, vecIn_next);
Value vecS2_rhs =
builder.create<arith::MulFOp>(loc, vecA2, vecOut_next);
Value vecS2_next =
builder.create<arith::SubFOp>(loc, vecS2_lhs, vecS2_rhs);

builder.create<scf::YieldOp>(
loc, std::vector<Value>{vecOut_next, vecS1_next, vecS2_next});
});

Value vecOut_tmp2 = processResult.getResult(0);
Value vecS1_tmp2 = processResult.getResult(1);
Value vecS2_tmp2 = processResult.getResult(2);

// This loop represent tail ending section.
rewriter.create<scf::ForOp>(
loc, upperBound, N, c1, ValueRange{vecOut_tmp2, vecS1_tmp2, vecS2_tmp2},
[&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) {
Value vecIn_move_right = builder.create<vector::ShuffleOp>(
loc, iargs[0], iargs[0],
ArrayRef<int64_t>{0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14});
Value vecIn_next = builder.create<vector::InsertElementOp>(
loc, f0, vecIn_move_right, c0);
Value vecOut_next =
builder.create<vector::FMAOp>(loc, vecB0, vecIn_next, iargs[1]);
Value out_elem =
builder.create<vector::ExtractElementOp>(loc, vecOut_next, i15);
builder.create<memref::StoreOp>(loc, out_elem, output, iv);

Value vecS1_lhs =
builder.create<vector::FMAOp>(loc, vecB1, vecIn_next, iargs[2]);
Value vecS1_rhs =
builder.create<arith::MulFOp>(loc, vecA1, vecOut_next);
Value vecS1_next =
builder.create<arith::SubFOp>(loc, vecS1_lhs, vecS1_rhs);

Value vecS2_lhs =
builder.create<arith::MulFOp>(loc, vecB2, vecIn_next);
Value vecS2_rhs =
builder.create<arith::MulFOp>(loc, vecA2, vecOut_next);
Value vecS2_next =
builder.create<arith::SubFOp>(loc, vecS2_lhs, vecS2_rhs);

builder.create<scf::YieldOp>(
loc, std::vector<Value>{vecOut_next, vecS1_next, vecS2_next});
});

rewriter.eraseOp(op);
return success();
}

private:
int64_t stride;
};

} // end anonymous namespace

void populateLowerDAPConversionPatterns(RewritePatternSet &patterns,
int64_t stride) {
patterns.add<DAPFirLowering>(patterns.getContext());
patterns.add<DAPBiquadLowering>(patterns.getContext(), stride);
patterns.add<DAPIirLowering>(patterns.getContext(), stride);
patterns.add<DAPIirSimdLowering>(patterns.getContext(), stride);
}

//===----------------------------------------------------------------------===//
Expand All @@ -363,7 +577,8 @@ class LowerDAPPass : public PassWrapper<LowerDAPPass, OperationPass<ModuleOp>> {
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<buddy::dap::DAPDialect, func::FuncDialect,
memref::MemRefDialect, scf::SCFDialect, VectorDialect,
affine::AffineDialect, arith::ArithDialect,linalg::LinalgDialect>();
affine::AffineDialect, arith::ArithDialect,
linalg::LinalgDialect>();
}
Option<int64_t> stride{*this, "DAP-vector-splitting",
llvm::cl::desc("Vector splitting size."),
Expand All @@ -376,10 +591,10 @@ void LowerDAPPass::runOnOperation() {
ModuleOp module = getOperation();

ConversionTarget target(*context);
target.addLegalDialect<affine::AffineDialect, scf::SCFDialect,
func::FuncDialect, memref::MemRefDialect,
VectorDialect, arith::ArithDialect,
linalg::LinalgDialect>();
target
.addLegalDialect<affine::AffineDialect, scf::SCFDialect,
func::FuncDialect, memref::MemRefDialect, VectorDialect,
arith::ArithDialect, linalg::LinalgDialect>();
target.addLegalOp<ModuleOp, func::FuncOp, func::ReturnOp>();

RewritePatternSet patterns(context);
Expand Down

0 comments on commit abc66ab

Please sign in to comment.