Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compare bangSoftmax and cnnlSoftmax #95

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/04kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ if(USE_CUDA)
file(GLOB_RECURSE KERNEL_CUDA_SRC src/*.cu)
add_subdirectory(cuda)
endif()
if(USE_BANG)
file(GLOB_RECURSE KERNEL_BANG_SRC src/*.mlu)
endif()

add_library(kernel STATIC ${KERNEL_SRC} ${KERNEL_CUDA_SRC})
target_link_libraries(kernel PUBLIC runtime)
Expand Down
11 changes: 11 additions & 0 deletions src/04kernel/src/collectors/softmax.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "kernel/collectors/softmax.h"
#include "../kernels/softmax/bang_kernel.hh"
#include "../kernels/softmax/cnnl_kernel.hh"
#include "../kernels/softmax/cpu_kernel.hh"
#include "../kernels/softmax/cuda_kernel.hh"
#include "../kernels/softmax/cudnn_kernel.hh"
Expand Down Expand Up @@ -28,6 +30,15 @@ namespace refactor::kernel {
}
break;
}
case decltype(_target)::Mlu: {
if (auto ptr = SoftmaxCnnl::build(cnnl::SoftmaxAlgo::ACCURATE, info); ptr) {
ans.emplace_back(std::move(ptr));
}
if (auto ptr = SoftmaxBang::build(info); ptr) {
ans.emplace_back(std::move(ptr));
}
break;
}
default:
UNREACHABLEX(void, "Unknown target");
}
Expand Down
29 changes: 29 additions & 0 deletions src/04kernel/src/kernels/softmax/bang_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "bang_kernel.hh"

namespace refactor::kernel {
using K = SoftmaxBang;

K::SoftmaxBang(SoftmaxInfo info_) noexcept
: Kernel(), info(std::move(info_)) {}

auto K::build(SoftmaxInfo info) noexcept -> KernelBox {
#ifndef USE_BANG
return nullptr;
#endif

return info.type.isFloat()
? std::make_unique<K>(std::move(info))
: nullptr;
}

auto K::typeId() noexcept -> size_t {
static uint8_t ID = 1;
return reinterpret_cast<size_t>(&ID);
}

auto K::kernelTypeId() const noexcept -> size_t { return typeId(); }
auto K::description() const noexcept -> std::string_view {
return "Performing Softmax using BANG";
}

}// namespace refactor::kernel
26 changes: 26 additions & 0 deletions src/04kernel/src/kernels/softmax/bang_kernel.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef KERNEL_SOFTMAX_BANG_HH
#define KERNEL_SOFTMAX_BANG_HH

#include "cnnl.h"
#include "cnrt.h"
#include "kernel/attributes/softmax_info.h"
#include "kernel/collectors/softmax.h"
namespace refactor::kernel {

struct SoftmaxBang final : public Kernel {
SoftmaxInfo info;

SoftmaxBang(SoftmaxInfo) noexcept;
static KernelBox build(SoftmaxInfo) noexcept;
static size_t typeId() noexcept;

size_t kernelTypeId() const noexcept final;
std::string_view description() const noexcept final;
#ifdef USE_BANG
RoutineWorkspace lower(Resources &) const noexcept final;
#endif
};

}// namespace refactor::kernel

#endif//KERNEL_SOFTMAX_BANG_HH
599 changes: 599 additions & 0 deletions src/04kernel/src/kernels/softmax/bang_kernel.mlu

Large diffs are not rendered by default.

88 changes: 88 additions & 0 deletions src/04kernel/src/kernels/softmax/cnnl_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#include "cnnl_kernel.hh"

#ifdef USE_BANG
#include "../../utilities/bang/cnnl_context.hh"
#include "../../utilities/bang/cnnl_functions.h"
#endif

namespace refactor::kernel {
using K = SoftmaxCnnl;

K::SoftmaxCnnl(cnnl::SoftmaxAlgo algo_, DataType type_,
int pre_, int mid_, int post_) noexcept
: Kernel(), algo(algo_), dataType(type_),
pre(pre_), mid(mid_), post(post_) {}

auto K::build(cnnl::SoftmaxAlgo algo, SoftmaxInfo info) noexcept -> KernelBox {
#ifndef USE_BANG
return nullptr;
#endif

return std::make_unique<K>(algo, info.type, info.pre, info.mid, info.post);
}
auto K::typeId() noexcept -> size_t {
static uint8_t ID = 1;
return reinterpret_cast<size_t>(&ID);
}

auto K::kernelTypeId() const noexcept -> size_t {
return typeId();
}
auto K::description() const noexcept -> std::string_view {
return "Performing softmax forward with CNNL";
}

#ifdef USE_BANG

auto SoftmaxCnnl::lower(Resources &res) const -> RoutineWorkspace {
using namespace cnnl;
using namespace runtime;

// RAII for closure
struct Descriptors {
cnnlTensorDescriptor_t t;
cnnlSoftmaxAlgorithm_t algo;
bool f32;

Descriptors(decltype(algo) algo_, decltype(f32) f32_)
: algo(algo_), f32(f32_) {
CNNL_ASSERT(cnnlCreateTensorDescriptor(&t));
}
~Descriptors() noexcept(false) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(t));
}
Descriptors(const Descriptors &) = delete;
Descriptors(Descriptors &&) = delete;
};

auto d = std::make_shared<Descriptors>(
static_cast<cnnlSoftmaxAlgorithm_t>(algo),
dataType != DataType::F64);
int dims[]{pre, mid, post};
// cnnlSoftmaxMode_t mode = (pre == 1) ? CNNL_SOFTMAX_MODE_HIGH_DIMENSION
// : (post == 1) ? CNNL_SOFTMAX_MODE_LOW_DIMENSION
// : CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION;
// FIXME(bolun): CNNL Softmax mode
cnnlSoftmaxMode_t mode = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION;

// cnnlSoftmaxForward_v2 is applied to a 3D input tensor only
CNNL_ASSERT(cnnlSetTensorDescriptor(d->t, CNNL_LAYOUT_ARRAY, cnnlDataTypeConvert(dataType), 3, dims));

res.fetchOrStore<CnnlContext>();
return [d = std::move(d), mode](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
// build alpha/beta for double
auto a = d->f32 ? factor<fp32_t>(1) : factor<fp64_t>(1),
b = d->f32 ? factor<fp32_t>(0) : factor<fp64_t>(0);
CNNL_ASSERT(cnnlSoftmaxForward_v2(
res.fetchOrStore<CnnlContext>()->handle,
d->algo,
mode,
CNNL_COMPUTATION_ULTRAHIGH_PRECISION,
&a, d->t, inputs[0],
&b, d->t, outputs[0]));
};
}

#endif

}// namespace refactor::kernel
36 changes: 36 additions & 0 deletions src/04kernel/src/kernels/softmax/cnnl_kernel.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef KERNEL_SOFTMAX_CNNL_HH
#define KERNEL_SOFTMAX_CNNL_HH

#include "kernel/attributes/softmax_info.h"
#include "kernel/collectors/softmax.h"

namespace refactor::kernel {

namespace cnnl {
enum class SoftmaxAlgo {
FAST = 0,
ACCURATE = 1,
LOG = 2,
};
}// namespace cnnl

struct SoftmaxCnnl final : public Kernel {
cnnl::SoftmaxAlgo algo;
DataType dataType;
int pre, mid, post;

SoftmaxCnnl(cnnl::SoftmaxAlgo, DataType, int, int, int) noexcept;

static KernelBox build(cnnl::SoftmaxAlgo, SoftmaxInfo) noexcept;
static size_t typeId() noexcept;

size_t kernelTypeId() const noexcept final;
std::string_view description() const noexcept final;
#ifdef USE_BANG
RoutineWorkspace lower(Resources &) const final;
#endif
};

}// namespace refactor::kernel

#endif// KERNEL_SOFTMAX_CNNL_HH
45 changes: 45 additions & 0 deletions src/04kernel/src/utilities/bang/cnnl_context.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifdef USE_BANG

#include "cnnl_context.hh"
#include "cnnl_functions.h"

namespace refactor::kernel::cnnl {

CnnlContext::CnnlContext() : runtime::Resource() {
BANG_ASSERT(cnrtQueueCreate(&queue));
CNNL_ASSERT(cnnlCreate(&handle));
CNNL_ASSERT(cnnlSetQueue(handle, queue));
}
CnnlContext::~CnnlContext() {
BANG_ASSERT(cnrtQueueDestroy(queue));
CNNL_ASSERT(cnnlDestroy(handle));
}

auto CnnlContext::typeId() noexcept -> size_t {
static uint8_t ID = 1;
return reinterpret_cast<size_t>(&ID);
}
auto CnnlContext::build() -> runtime::ResourceBox {
return std::make_unique<CnnlContext>();
}

auto CnnlContext::resourceTypeId() const noexcept -> size_t {
return typeId();
}
auto CnnlContext::description() const noexcept -> std::string_view {
return "CnnlContext";
}

void CnnlContext::copyFromCPU(void *dst, const void *src, size_t size) {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), size,
CNRT_MEM_TRANS_DIR_HOST2DEV));
}

void CnnlContext::queueSync() {
BANG_ASSERT(cnrtQueueSync(queue));
}

}// namespace refactor::kernel::cnnl

#endif

32 changes: 32 additions & 0 deletions src/04kernel/src/utilities/bang/cnnl_context.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef KERNEL_CNNL_CONTEXT_HH
#define KERNEL_CNNL_CONTEXT_HH

#include "runtime/resource.h"
#include <cnnl.h>
#include <cnrt.h>

namespace refactor::kernel::cnnl {

struct CnnlContext final : public runtime::Resource {
cnnlHandle_t handle;
cnrtQueue_t queue;

CnnlContext();
~CnnlContext();
CnnlContext(CnnlContext const &) noexcept = delete;
CnnlContext(CnnlContext &&) noexcept = delete;

static size_t typeId() noexcept;
static runtime::ResourceBox build();

size_t resourceTypeId() const noexcept final;
std::string_view description() const noexcept final;

void copyFromCPU(void *dst, const void *src, size_t size);
void queueSync();
};

}// namespace refactor::kernel::cnnl

#endif// KERNEL_CNNL_CONTEXT_HH

39 changes: 39 additions & 0 deletions src/04kernel/src/utilities/bang/cnnl_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifdef USE_BANG

#include "cnnl_functions.h"

namespace refactor::kernel::cnnl {

cnnlDataType_t cnnlDataTypeConvert(DataType dataType) {
// clang-format off
switch (dataType) {
case DataType::F32 : return CNNL_DTYPE_FLOAT; break;
case DataType::F64 : return CNNL_DTYPE_DOUBLE; break;
case DataType::FP16: return CNNL_DTYPE_HALF; break;
case DataType::I8 : return CNNL_DTYPE_INT8; break;
case DataType::I32 : return CNNL_DTYPE_INT32; break;
case DataType::U8 : return CNNL_DTYPE_UINT8; break;
case DataType::BF16: return CNNL_DTYPE_BFLOAT16; break;
case DataType::I64 : return CNNL_DTYPE_INT64; break;
case DataType::Bool: return CNNL_DTYPE_BOOL; break;
default: UNREACHABLE();
}
// clang-format on
}

void setCnnlTensor(cnnlTensorDescriptor_t t, DataType dt, slice_t<int> d) {
auto dt_ = cnnlDataTypeConvert(dt);
if (auto n = d.size(); n == 4) {
CNNL_ASSERT(cnnlSetTensorDescriptor(t, CNNL_LAYOUT_NCHW, dt_, d.size(), d.begin()));
} else if (n < 4) {
int d_[]{1, 1, 1, 1};
std::copy_n(d.begin(), n, d_ + 4 - n);
CNNL_ASSERT(cnnlSetTensorDescriptor(t, CNNL_LAYOUT_NCHW, dt_, 4, std::move(d_)));
} else {
CNNL_ASSERT(cnnlSetTensorDescriptor(t, CNNL_LAYOUT_NCHW, dt_, d.size(), d.begin()));
}
}
}// namespace refactor::kernel::cnnl

#endif

41 changes: 41 additions & 0 deletions src/04kernel/src/utilities/bang/cnnl_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef KERNEL_CNNL_FUNCTIONS_H
#define KERNEL_CNNL_FUNCTIONS_H

#include "common.h"
#include <cnnl.h>

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}

#define CNNL_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNNL_STATUS_SUCCESS) { \
fmt::println("cnnl failed on \"" #STATUS "\" with {}", \
cnnlGetErrorString(status)); \
abort(); \
}

namespace refactor::kernel::cnnl {

cnnlDataType_t cnnlDataTypeConvert(DataType);

// A helper function that set Cnnl tensor descriptor given tensor shape and type
void setCnnlTensor(cnnlTensorDescriptor_t, DataType, slice_t<int>);

template<class T>
constexpr uint64_t factor(T x) noexcept {
static_assert(std::is_floating_point_v<T>);
static_assert(sizeof(T) <= sizeof(uint64_t));
union {
T f;
uint64_t i;
} u{x};
return u.i;
}

}// namespace refactor::kernel::cnnl

#endif// KERNEL_CNNL_FUNCTIONS_H

Loading
Loading