Skip to content

Commit

Permalink
Revert "WIP"
Browse files Browse the repository at this point in the history
This reverts commit fd992c4.
  • Loading branch information
zero9178 committed Jul 29, 2024
1 parent efe3439 commit 978d0ab
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 654 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ def TileSizeList : OptionalArrayRefParameter<"int64_t"> {
}];
}

defvar BankAlignmentList = TileSizeList;

def QuidditchSnitch_LoweringConfigAttr : QuidditchSnitch_Attr<"LoweringConfig",
[DeclareAttrInterfaceMethods<IREECodegen_LoweringConfigAttrInterface, [
"getWorkgroupTileSizes",
Expand All @@ -55,16 +53,14 @@ def QuidditchSnitch_LoweringConfigAttr : QuidditchSnitch_Attr<"LoweringConfig",
let parameters = (ins
TileSizeList:$workgroup_tiles,
TileSizeList:$l1_tiles,
DefaultValuedParameter<"bool", "false">:$dual_buffer,
BankAlignmentList:$bank_alignments
DefaultValuedParameter<"bool", "false">:$dual_buffer
);

let builders = [
AttrBuilder<(ins
CArg<"llvm::ArrayRef<int64_t>", "{}">:$workgroupTiles,
CArg<"llvm::ArrayRef<int64_t>", "{}">:$l1Tiles,
CArg<"bool", "false">:$dualBuffer,
CArg<"llvm::ArrayRef<int64_t>", "{}">:$bankAlignments
CArg<"bool", "false">:$dualBuffer
)>
];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ static ArrayRef<int64_t> dropTrailingZeros(ArrayRef<int64_t> array) {
LoweringConfigAttr LoweringConfigAttr::get(MLIRContext *context,
ArrayRef<int64_t> workgroupTiles,
ArrayRef<int64_t> l1Tiles,
bool dualBuffer,
ArrayRef<int64_t> bankAlignments) {
bool dualBuffer) {
return Base::get(context, dropTrailingZeros(workgroupTiles),
dropTrailingZeros(l1Tiles), dualBuffer, bankAlignments);
dropTrailingZeros(l1Tiles), dualBuffer);
}

//===----------------------------------------------------------------------===//
Expand Down
57 changes: 0 additions & 57 deletions codegen/compiler/src/Quidditch/Target/AvoidBankConflicts.cpp

This file was deleted.

1 change: 0 additions & 1 deletion codegen/compiler/src/Quidditch/Target/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ iree_cc_library(
"Passes.h"
"Passes.h.inc"
SRCS
"AvoidBankConflicts.cpp"
"ConvertToLLVM.cpp"
"ConfigureForSnitch.cpp"
"DisableQuidditchVariant.cpp"
Expand Down
4 changes: 0 additions & 4 deletions codegen/compiler/src/Quidditch/Target/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,4 @@ def ConvertToLLVMPass : Pass<"quidditch-convert-to-llvm", "mlir::ModuleOp"> {
];
}

def AvoidBankConflictsPass : Pass<"quidditch-avoid-bank-conflicts"> {

}

#endif
5 changes: 0 additions & 5 deletions codegen/compiler/src/Quidditch/Target/QuidditchTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,6 @@ class QuidditchTargetBackend final : public IREE::HAL::TargetBackend {
StringAttr::get(context, "riscv32-unknown-elf"));
list.append("compute_cores",
IntegerAttr::get(IntegerType::get(context, 32), 8));
list.append("bank_count",
IntegerAttr::get(IntegerType::get(context, 32), 32));
list.append("bank_byte_size",
IntegerAttr::get(IntegerType::get(context, 32), 4));
executableTargetAttrs.push_back(IREE::HAL::ExecutableTargetAttr::get(
context, StringAttr::get(context, "quidditch"),
StringAttr::get(context, "static"), list.getDictionary(context)));
Expand Down Expand Up @@ -232,7 +228,6 @@ class QuidditchTargetBackend final : public IREE::HAL::TargetBackend {
addIREEPostBufferizationPasses(modulePassManager.nest<func::FuncOp>());

FunctionLikeNest(modulePassManager)
.addPass(quidditch::createAvoidBankConflictsPass)
.addPass(quidditch::Snitch::createLowerPipelineOpPass)
.addPass(quidditch::Snitch::createLowerForallOpPass)
.addPass(createSCFForLoopCanonicalizationPass)
Expand Down
8 changes: 1 addition & 7 deletions runtime/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,8 @@ include(CTest)

find_package(Python3 COMPONENTS Interpreter REQUIRED)

add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/test.o
COMMAND ${QUIDDITCH_TOOLCHAIN_ROOT}/bin/pulp-as
${CMAKE_CURRENT_LIST_DIR}/test.s --filetype=obj --target-abi=ilp32d
--mcpu=snitch -g -o ${CMAKE_CURRENT_BINARY_DIR}/test.o
DEPENDS test.s ${QUIDDITCH_TOOLCHAIN_ROOT}/bin/pulp-as)

# Sanity check that our toolchain, emulator etc. work
add_executable(HelloWorld main.c ${CMAKE_CURRENT_BINARY_DIR}/test.o)
add_executable(HelloWorld main.c)
target_link_libraries(HelloWorld snRuntime)

macro(test_executable target_name)
Expand Down
142 changes: 2 additions & 140 deletions runtime/tests/main.c
Original file line number Diff line number Diff line change
@@ -1,147 +1,9 @@
#include <memory_decls.h>
#include <riscv_decls.h>
#include <ssr_decls.h>
#include <stdio.h>
#include <sync_decls.h>
#include <team_decls.h>

#include "/home/markus/CLionProjects/Quidditch/runtime/cmake-build-release/snitch_cluster/cluster_gen/snitch_cluster_addrmap.h"
#include "/home/markus/CLionProjects/Quidditch/runtime/cmake-build-release/snitch_cluster/cluster_gen/snitch_cluster_peripheral.h"

#define CLUSTER_PERF_COUNTER_ADDR \
(CLUSTER_PERIPH_BASE_ADDR + \
SNITCH_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_REG_OFFSET)

inline uint32_t __attribute__((const)) snrt_cluster_perf_counters_addr() {
return CLUSTER_PERF_COUNTER_ADDR;
}

#include "/home/markus/CLionProjects/Quidditch/snitch_cluster/sw/snRuntime/src/perf_cnt.h"

const unsigned REDUCTION_SIZE = 100;
const unsigned ROWS = 5;
const enum snrt_perf_cnt_type METRIC = SNRT_PERF_CNT_TCDM_CONGESTED;

// memref<1x161xf64>, memref<5x161xf64>, memref<1x5xf64>
void kernel161(double vector[], double (*weights)[REDUCTION_SIZE],
double output[]);

// memref<1x100xf64>, memref<5x100xf64>, memref<1x5xf64>
// In NsNet2 148590 conflicts across 122 streaming regions
// (1 fill, 120 core computation and 1 elementwise post-processing).
// Roughly 1217 conflicts per region with one core performing 500 fmadds and
// 600 memory accesses using the streams.
// Measured here: 2162.
void kernel100(double vector[], double (*weights)[REDUCTION_SIZE],
double output[]);

void kernel100_stride112(double vector[], double (*weights)[REDUCTION_SIZE],
double output[]);

unsigned alignTo(unsigned value, unsigned multiple) {
unsigned int rem = value % multiple;
if (rem == 0) return value;
value += multiple - rem;
return value;
}

double* padToBank(const double* ptr, unsigned bank) {
bank &= ~1u;

size_t address = (size_t)ptr;
size_t current_bank = (address / 4) % 32;
if (bank < current_bank) bank += 32;
address += 4 * (bank - current_bank);
return (double*)address;
}

void streamerSetup3();

int main() {
if (snrt_is_dm_core()) {
snrt_cluster_hw_barrier();
snrt_cluster_hw_barrier();
snrt_cluster_hw_barrier();
snrt_cluster_hw_barrier();
return 0;
}

unsigned id = snrt_cluster_core_idx();
double* output = (double*)snrt_l1_start_addr() + id * ROWS;
double* vector = (double*)snrt_l1_start_addr() + 8 * ROWS;
vector = padToBank(vector, /*bank=*/0);
double* vector_end = vector + REDUCTION_SIZE;
vector_end = padToBank(vector_end, 16);
// for (int i = 0; i < REDUCTION_SIZE; i++) {
// vector[i] = 1;
// }

// Contiguous layout between the different tiles.
unsigned dim1Stride = alignTo(REDUCTION_SIZE, 16);
unsigned dim1Index = id * ROWS;
double(*weights)[dim1Stride] =
(double(*)[dim1Stride])(vector_end + dim1Index * dim1Stride);
// for (int j = 0; j < ROWS; j++) {
// for (int i = 0; i < REDUCTION_SIZE; i++) {
// weights[j][i] = id;
// }
// }

volatile uint32_t* pmtx = snrt_mutex();
snrt_mutex_acquire(pmtx);
if (id == 0) {
printf("Vector preload will hit banks %d, %d, %d, %d\n",
(((size_t)vector) / 4) % 32, ((size_t)(vector + 1) / 4) % 32,
((size_t)(vector + 2) / 4) % 32, ((size_t)(vector + 3) / 4) % 32);
}
printf("Weights[%d] preload will hit banks %d, %d, %d, %d\n", id,
(((size_t)weights) / 4) % 32, (((size_t)weights[1]) / 4) % 32,
(((size_t)weights[2]) / 4) % 32, (((size_t)weights[3]) / 4) % 32);
snrt_mutex_release(pmtx);

streamerSetup3();

if (id == 0) {
snrt_reset_perf_counter(SNRT_PERF_CNT0);
snrt_start_perf_counter(SNRT_PERF_CNT0, METRIC, 0);
}

// Sync PC.
uint32_t r;
asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory");
if (id < 8) kernel100(vector, weights, output);

snrt_fpu_fence();
asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory");

if (id == 0) snrt_stop_perf_counter(SNRT_PERF_CNT0);

weights = (double(*)[REDUCTION_SIZE])(((double*)0x1000a5c0) +
id * 5 * REDUCTION_SIZE);

snrt_cluster_hw_barrier();

if (id == 0) printf("%" PRId32 "\n", snrt_get_perf_counter(SNRT_PERF_CNT0));

if (id == 0) {
snrt_reset_perf_counter(SNRT_PERF_CNT0);
snrt_start_perf_counter(SNRT_PERF_CNT0, METRIC, 0);
}

// Sync PC.
asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory");
if (id < 8) kernel100(vector, weights, output);

snrt_fpu_fence();
asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory");

if (id == 0) snrt_stop_perf_counter(SNRT_PERF_CNT0);

snrt_cluster_hw_barrier();

// Old: 2118. No bubbles.
// New: 1931, first time, 2118 second. Bubbles at the start of the first time.
if (id == 0) printf("%" PRId32 "\n", snrt_get_perf_counter(SNRT_PERF_CNT0));
if (!snrt_is_dm_core()) return 0;

printf("Hello World\n");
return 0;
}
Loading

0 comments on commit 978d0ab

Please sign in to comment.