Skip to content

Commit

Permalink
Merge branch 'main' into thread_groups_for_conv
Browse files Browse the repository at this point in the history
  • Loading branch information
newling authored Aug 26, 2024
2 parents 46bb421 + 9ab9ea0 commit fae3b49
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 67 deletions.
93 changes: 93 additions & 0 deletions .github/workflows/ci-macos.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: CI MacOS

on:
workflow_call:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- main

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit).
group: ci-build-test-cpp-macos-${{ github.event.number || github.sha }}
cancel-in-progress: true

jobs:
build_and_ctest:
name: Build and Test (${{ matrix.runs-on }}, ASSERTIONS)
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
runs-on: [macos-12, macos-14]
env:
CACHE_DIR: ${{ github.workspace }}/.container-cache
steps:
- name: Set unified TZ
uses: szenius/[email protected]
with:
# this is an arbitrary choice
timezoneLinux: "Asia/Singapore"
timezoneMacos: "Asia/Singapore"
timezoneWindows: "Singapore Standard Time"

- name: "Checking out repository"
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
submodules: recursive

- uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install deps
run: |
brew install ccache ninja
- name: Sync source deps
run: |
python ./sync_deps.py
- name: Python deps
run: |
pip install "numpy<2" pyyaml "pybind11[global]==2.10.3" nanobind
- name: Enable cache
uses: actions/cache/restore@v3
with:
path: ${{ env.CACHE_DIR }}
# without datetime stamps you'll get collisions for the cache warming runs
# ("Failed to save: Unable to reserve cache with key ..., another job may be creating this cache.")
key: ${{ matrix.runs-on }}-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }}
restore-keys: ${{ matrix.runs-on }}-build-test-cpp-

- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
bash build_tools/ci/build_test_cpp.sh
- name: Create artifacts
if: ${{ !cancelled() }}
run: |
rm -f iree-install/bin/clang*
rm -f iree-install/bin/llvm-link*
tar cf iree-dist-${{ matrix.runs-on }}.tar -C iree-install . -C ../iree-build tools/testing/e2e/iree-e2e-matmul-test
- name: Upload artifacts
uses: actions/upload-artifact@v4
if: ${{ !cancelled() }}
with:
name: ${{ matrix.runs-on }}_release_packages
path: iree-dist-${{ matrix.runs-on }}.tar
if-no-files-found: warn

- name: Save cache
uses: actions/cache/save@v3
if: ${{ !cancelled() }}
with:
path: ${{ env.CACHE_DIR }}
key: ${{ matrix.runs-on }}-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }}
17 changes: 13 additions & 4 deletions build_tools/ci/build_test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ echo '{
}' > $iree_dir/CMakeUserPresets.json

cd $iree_dir
cmake -S "$iree_dir" -B "$build_dir" \
CMAKE_ARGS="\
-S $iree_dir \
-B $build_dir \
-GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$install_dir" \
-DCMAKE_INSTALL_PREFIX=$install_dir \
-DCMAKE_INSTALL_LIBDIR=lib \
-DIREE_ENABLE_ASSERTIONS=ON \
-DIREE_BUILD_SAMPLES=OFF \
Expand All @@ -74,8 +76,13 @@ cmake -S "$iree_dir" -B "$build_dir" \
-DIREE_INPUT_STABLEHLO=OFF \
-DIREE_INPUT_TORCH=OFF \
-DCMAKE_OBJECT_PATH_MAX=4096 \
-DIREE_CMAKE_PLUGIN_PATHS=../iree-amd-aie \
-DIREE_EXTERNAL_HAL_DRIVERS=xrt
-DIREE_CMAKE_PLUGIN_PATHS=$PWD/../iree-amd-aie"

if [[ "$OSTYPE" != "darwin"* ]]; then
CMAKE_ARGS="$CMAKE_ARGS -DIREE_EXTERNAL_HAL_DRIVERS=xrt"
fi

cmake $CMAKE_ARGS

echo "Building all"
echo "------------"
Expand All @@ -90,6 +97,8 @@ echo "CTest"
echo "-----"
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j
elif [[ "$OSTYPE" == "darwin"* ]]; then
ctest --test-dir "$build_dir" -R amd-aie -E "pack_peel_pipeline_matmul|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5
else
# hack while windows is flaky to get past failing tests
ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j --repeat until-pass:5
Expand Down
1 change: 1 addition & 0 deletions compiler/plugins/target/AMD-AIE/aievec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ iree_cc_library(
::AIEVecOpsGen
::AIEVecDialectGen
::AIEVecAttrsGen
::AIEVecXLLVMOpsGen
MLIRIR
)

Expand Down
62 changes: 33 additions & 29 deletions compiler/plugins/target/AMD-AIE/air/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,6 @@ iree_cc_library(
# AIR Dialect
###############################################################################

iree_cc_library(
NAME
AIRDialectIR
SRCS
${IREE_MLIR_AIR_SOURCE_DIR}/lib/Dialect/AIR/IR/AIRDialect.cpp
DEPS
::defs
::AIRDialectGen
::AIRInterfaceGen
::AIRTransformOpsGen
MLIRIR
)

iree_tablegen_library(
NAME
AIRDialectGen
Expand All @@ -60,6 +47,26 @@ iree_tablegen_library(
-gen-op-interface-defs Dialect/AIR/AIROpInterfaces.cpp.inc
)


iree_tablegen_library(
NAME
AIRConversionPassesIncGen
TD_FILE
"${IREE_MLIR_AIR_SOURCE_DIR}/include/air/Conversion/Passes.td"
OUTS
-gen-pass-decls Conversion/Passes.h.inc
)

iree_tablegen_library(
NAME
AIRTransformOpsGen
TD_FILE
"${IREE_MLIR_AIR_SOURCE_DIR}/include/air/Dialect/AIR/AIRTransformOps.td"
OUTS
-gen-op-decls Dialect/AIR/AIRTransformOps.h.inc
-gen-op-defs Dialect/AIR/AIRTransformOps.cpp.inc
)

iree_cc_library(
NAME
AIRTransformOps
Expand All @@ -70,19 +77,24 @@ iree_cc_library(
::AIRDialectIR
::AIRTransformOpsGen
::AIRTransformPasses
::AIRConversionPassesIncGen
iree::target::amd-aie::aie::AIEDialectIR
MLIRIR
MLIRLinalgTransformOps
)

iree_tablegen_library(
iree_cc_library(
NAME
AIRTransformOpsGen
TD_FILE
"${IREE_MLIR_AIR_SOURCE_DIR}/include/air/Dialect/AIR/AIRTransformOps.td"
OUTS
-gen-op-decls Dialect/AIR/AIRTransformOps.h.inc
-gen-op-defs Dialect/AIR/AIRTransformOps.cpp.inc
AIRDialectIR
SRCS
${IREE_MLIR_AIR_SOURCE_DIR}/lib/Dialect/AIR/IR/AIRDialect.cpp
DEPS
::defs
::AIRDialectGen
::AIRInterfaceGen
::AIRTransformOpsGen
::AIRConversionPassesIncGen
MLIRIR
)

###############################################################################
Expand Down Expand Up @@ -121,15 +133,6 @@ iree_tablegen_library(
# AIR Conversion Passes
###############################################################################

iree_tablegen_library(
NAME
AIRConversionPassesIncGen
TD_FILE
"${IREE_MLIR_AIR_SOURCE_DIR}/include/air/Conversion/Passes.td"
OUTS
-gen-pass-decls Conversion/Passes.h.inc
)

iree_cc_library(
NAME
AIRConversionPassHeaders
Expand Down Expand Up @@ -164,6 +167,7 @@ iree_cc_library(
MLIRTransforms
)

include(iree_aie_utils)
replace_string_in_file(
${IREE_MLIR_AIR_SOURCE_DIR}/include/air/Conversion/PassDetail.h
"aie/Dialect/AIEX/IR" "aie")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,14 @@ FailureOr<AIE::ObjectFifoCreateOp> createObjectFifo(
1, std::multiplies<>());
int64_t targetSize = std::accumulate(targetShape.begin(), targetShape.end(),
1, std::multiplies<>());
// TODO(jornt) for now, memory space 1 is used for objectfifos. Maybe refactor
// `aie.objectfifo` in the future to support different memory spaces.
MemRefType memrefType =
sourceSize < targetSize
? MemRefType::get({sourceSize}, srcType.getElementType(),
MemRefLayoutAttrInterface{},
rewriter.getI64IntegerAttr(1))
srcType.getMemorySpace())
: MemRefType::get({targetSize}, dstType.getElementType(),
MemRefLayoutAttrInterface{},
rewriter.getI64IntegerAttr(1));
dstType.getMemorySpace());
AIE::AIEObjectFifoType dtype = AIE::AIEObjectFifoType::get(memrefType);
auto fifo = rewriter.create<AIE::ObjectFifoCreateOp>(
rewriter.getUnknownLoc(), symName, srcTile, dstTiles,
Expand Down Expand Up @@ -204,10 +202,9 @@ LogicalResult accessOpToAIE(IRRewriter &rewriter,
}

auto type = cast<MemRefType>(oldReinterpretOp.getResult().getType());
// TODO(jornt): for now, memory space 1 is used for objectFifos. Refactor
// `aie.objectfifo` to support different memory spaces to avoid hardcoding.
MemRefType newType =
MemRefType::Builder(type).setMemorySpace(rewriter.getI64IntegerAttr(1));

MemRefType newType = MemRefType::Builder(type);

llvm::ArrayRef<int64_t> sizes = newType.getShape();
auto [strides, baseOffset] = getStridesAndOffset(newType);
auto reinterpretOp = rewriter.create<memref::ReinterpretCastOp>(
Expand All @@ -229,6 +226,7 @@ LogicalResult acquireOpToAIE(IRRewriter &rewriter,
IRMapping &mapper,
SmallVector<Operation *> &toBeErased) {
LLVM_DEBUG(llvm::dbgs() << "Convert [AMDAIE::LogicalObjectFifoAcquire]\n");

OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(acquireOp);
auto dmaOp =
Expand All @@ -244,20 +242,25 @@ LogicalResult acquireOpToAIE(IRRewriter &rewriter,
return acquireOp.emitError()
<< "input isn't mapped to an `aie.objectifo` operation";
}
AIE::AIEObjectFifoType ofTy =
cast<AIE::AIEObjectFifoType>(objFifo.getElemType());
MemRefType elementType = MemRefType::Builder(ofTy.getElementType())
.setMemorySpace(rewriter.getI64IntegerAttr(1));

auto acquireOpType = dyn_cast<LogicalObjectFifoType>(acquireOp.getType());
assert(acquireOpType &&
"Expected LogicalObjectFifoAcquire to have type "
"LogicalObjectFifoType");
MemRefType elementType = acquireOpType.getElementType();

auto subviewType = AIE::AIEObjectFifoSubviewType::get(elementType);
AIE::ObjectFifoPort port =
acquireOp.getPort() == LogicalObjectFifoPort::Produce
? AIE::ObjectFifoPort::Produce
: AIE::ObjectFifoPort::Consume;
auto objFifoAquireOp = rewriter.create<AIE::ObjectFifoAcquireOp>(
rewriter.getUnknownLoc(), subviewType, port, objFifo.getName(), 1);

auto subviewOp = rewriter.create<AIE::ObjectFifoSubviewAccessOp>(
rewriter.getUnknownLoc(), elementType, objFifoAquireOp.getSubview(),
rewriter.getIntegerAttr(rewriter.getI32Type(), 0));

// Map acquire op to new acquire + subview op.
mapper.map(acquireOp.getOperation(), subviewOp.getOperation());
mapper.map(acquireOp.getResult(), subviewOp.getOutput());
Expand Down Expand Up @@ -1008,7 +1011,7 @@ class AMDAIELowerToAIEPass
}

AMDAIELowerToAIEPass() = default;
AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass) {};
AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass){};
void runOnOperation() override;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
// -----

// CHECK: aie.device
// CHECK-DAG: func.func private @ukernel_A(memref<i32, 1>, index) attributes {llvm.bareptr = true}
// CHECK-DAG: func.func private @ukernel_B(memref<i32, 1>, index, memref<f32, 1>, index) attributes {llvm.bareptr = true}
// CHECK-DAG: func.func private @ukernel_A(memref<i32, 2>, index) attributes {llvm.bareptr = true}
// CHECK-DAG: func.func private @ukernel_B(memref<i32, 2>, index, memref<f32, 2>, index) attributes {llvm.bareptr = true}
// CHECK-DAG: %[[TILE_0_2:.+]] = aie.tile(0, 2)
// CHECK: aie.core(%[[TILE_0_2]])
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
Expand All @@ -233,11 +233,11 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
// CHECK-SAME: Produce
// CHECK: %[[ACCESS0:.+]] = aie.objectfifo.subview.access %[[ACQUIRE0]]
// CHECK: %[[REINTERPRET0:.+]] = memref.reinterpret_cast %[[ACCESS0]]
// CHECK: linalg.fill ins(%{{.+}} : i32) outs(%[[REINTERPRET]] : memref<32x32xi32, 1>)
// CHECK: linalg.fill ins(%{{.+}} : i32) outs(%[[REINTERPRET]] : memref<32x32xi32, 2>)
// CHECK: %[[BASE_BUFFER:.*]], %{{.+}}, %{{.+}}:2, %{{.+}}:2 = memref.extract_strided_metadata %[[REINTERPRET]] :
// CHECK: %[[BASE_BUFFER0:.*]], %{{.+}}, %{{.+}}:2, %{{.+}}:2 = memref.extract_strided_metadata %[[REINTERPRET0]] :
// CHECK: func.call @ukernel_A(%[[BASE_BUFFER]], %[[C0]]) : (memref<i32, 1>, index) -> ()
// CHECK: func.call @ukernel_B(%[[BASE_BUFFER]], %[[C0]], %[[BASE_BUFFER0]], %[[C0]]) : (memref<i32, 1>, index, memref<f32, 1>, index) -> ()
// CHECK: func.call @ukernel_A(%[[BASE_BUFFER]], %[[C0]]) : (memref<i32, 2>, index) -> ()
// CHECK: func.call @ukernel_B(%[[BASE_BUFFER]], %[[C0]], %[[BASE_BUFFER0]], %[[C0]]) : (memref<i32, 2>, index, memref<f32, 2>, index) -> ()
// CHECK: aie.end
// CHECK: } {link_with = "/path/to/ukernel.o"}
// CHECK: aiex.runtime_sequence @lower_to_aie_ukernel
Expand Down Expand Up @@ -738,10 +738,10 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
// CHECK: aie.device(npu1_4col) {
// CHECK: %[[TILE_0_0:.*]] = aie.tile(0, 0)
// CHECK: %[[TILE_0_1:.*]] = aie.tile(0, 1)
// CHECK: aie.objectfifo @[[OBJ0:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo<memref<1024xbf16, 1>>
// CHECK: aie.objectfifo @[[OBJ1:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo<memref<1024xbf16, 1>>
// CHECK: aie.objectfifo @[[OBJ0:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo<memref<1024xbf16, 1 : i32>>
// CHECK: aie.objectfifo @[[OBJ1:.*]](%[[TILE_0_0]], {%[[TILE_0_1]]}, 2 : i32) : !aie.objectfifo<memref<1024xbf16, 1 : i32>>
// CHECK: aie.objectfifo @[[OBJ2:.*]](%[[TILE_0_1]]
// CHECK-SAME: %[[TILE_0_0]]}, 2 : i32) : !aie.objectfifo<memref<1024xf32, 1>>
// CHECK-SAME: %[[TILE_0_0]]}, 2 : i32) : !aie.objectfifo<memref<1024xf32>>
// CHECK: aiex.runtime_sequence @bf16_f32_lit_test
// CHECK-SAME: (%[[LHS:.*]]: memref<32x32xbf16>, %[[RHS:.*]]: memref<32x32xbf16>, %[[OUT:.*]]: memref<32x32xf32>) {
// CHECK: aiex.npu.dma_memcpy_nd
Expand Down
2 changes: 1 addition & 1 deletion iree_compiler_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ endif()

if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
include(iree_aie_xrt)
include(iree_aie_bootgen)
endif()
include(iree_aie_bootgen)

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/compiler/plugins/target/AMD-AIE target/AMD-AIE)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/tests/samples AMD-AIE/tests/samples)
Expand Down
Loading

0 comments on commit fae3b49

Please sign in to comment.