Skip to content

Commit

Permalink
Merge branch 'dev' into dev-hardware
Browse files Browse the repository at this point in the history
  • Loading branch information
kilinchange committed Jan 15, 2024
2 parents 31478cc + 54c2f7e commit 2f2ad5d
Show file tree
Hide file tree
Showing 33 changed files with 403 additions and 196 deletions.
10 changes: 4 additions & 6 deletions src/02hardware/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(hardware VERSION 0.0.0 LANGUAGES CXX)
message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})

# Source files
file(GLOB_RECURSE HARDWARE_SRC src/*.cc src/*.cpp)
add_library(hardware STATIC ${HARDWARE_SRC})
target_link_libraries(hardware PUBLIC common)
target_include_directories(hardware PUBLIC include)

if(USE_CUDA)
file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu)
target_include_directories(hardware PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
endif()

add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC} ${HARDWARE_BANG_SRC})
target_link_libraries(hardware PUBLIC common)
target_include_directories(hardware PUBLIC include)

file(GLOB_RECURSE HARDWARE_TEST test/*.cpp)
if(HARDWARE_TEST)
add_executable(hardware_test ${HARDWARE_TEST})
Expand Down
2 changes: 1 addition & 1 deletion src/02hardware/include/hardware/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace refactor::hardware {

virtual ~Device() = default;
virtual Type type() const noexcept = 0;
virtual void setContext() const noexcept;
virtual void setContext() const;

Arc<Blob> malloc(size_t);
Arc<Blob> absorb(Arc<Blob> &&);
Expand Down
2 changes: 1 addition & 1 deletion src/02hardware/include/hardware/devices/nvidia.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace refactor::hardware {
class Nvidia final : public Device {
public:
explicit Nvidia(int32_t card);
void setContext() const noexcept final;
void setContext() const final;
Type type() const noexcept final {
return Type::Nvidia;
}
Expand Down
2 changes: 1 addition & 1 deletion src/02hardware/src/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ namespace refactor::hardware {
Device::Device(decltype(_card) card, decltype(_mem) mem)
: _card(card), _mem(std::move(mem)) {}

void Device::setContext() const noexcept {}
void Device::setContext() const {}
auto Device::malloc(size_t size) -> Arc<Blob> {
return Arc<Blob>(new Blob(this, size));
}
Expand Down
25 changes: 20 additions & 5 deletions src/02hardware/src/devices/mlu/device.cc
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
#include "functions.hh"
#include "hardware/devices/mlu.h"
#include "hardware/mem_pool.h"

#ifdef USE_BANG
#include "cnrt.h"
#include "memory.hh"

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}

#endif
namespace refactor::hardware {

static Arc<Memory> bangMemory(int32_t card) {
#ifdef USE_BANG
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
unsigned deviceCount;
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
ASSERT(0 <= card && card < deviceCount, "Invalid card id: {}", card);
BANG_ASSERT(cnrtSetDevice(card));

size_t free, total;
BANG_ASSERT(cnrtMemGetInfo(&free, &total));
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
fmt::println("initializing Cambricon MLU {}, memory {} / {}, alloc {}",
card, free, total, size);
Expand All @@ -25,7 +38,9 @@ namespace refactor::hardware {
Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}

void Mlu::setContext() const noexcept {
setDevice(_card);
#ifdef USE_BANG
BANG_ASSERT(cnrtSetDevice(_card));
#endif
}

}// namespace refactor::hardware
21 changes: 0 additions & 21 deletions src/02hardware/src/devices/mlu/functions.cc

This file was deleted.

28 changes: 0 additions & 28 deletions src/02hardware/src/devices/mlu/functions.hh

This file was deleted.

15 changes: 12 additions & 3 deletions src/02hardware/src/devices/mlu/memory.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
#ifdef USE_BANG

#include "memory.hh"
#include "functions.hh"
#include "cnrt.h"
#include "common.h"

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}

namespace refactor::hardware {
#ifdef USE_BANG

using M = MluMemory;

void *M::malloc(size_t size) {
Expand All @@ -28,6 +37,6 @@ namespace refactor::hardware {
CNRT_MEM_TRANS_DIR_PEER2PEER));
return dst;
}
#endif

}// namespace refactor::hardware
#endif
40 changes: 29 additions & 11 deletions src/02hardware/src/devices/nvidia/device.cc
Original file line number Diff line number Diff line change
@@ -1,31 +1,49 @@
#include "functions.cuh"
#include "hardware/devices/nvidia.h"
#include "hardware/devices/nvidia.h"
#include "hardware/mem_pool.h"
#include "memory.cuh"

#ifdef USE_CUDA
#include "memory.hh"
#include <cuda_runtime.h>

#define CUDA_ASSERT(STATUS) \
if (auto status = (STATUS); status != cudaSuccess) { \
RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \
cudaGetErrorString(status), (int) status)); \
}
#endif

namespace refactor::hardware {

static Arc<Memory> cudaMemory(int32_t card) {
#ifdef USE_CUDA
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
int deviceCount;
CUDA_ASSERT(cudaGetDeviceCount(&deviceCount));
ASSERT(0 <= card && card < deviceCount, "Invalid card id: {}", card);
CUDA_ASSERT(cudaSetDevice(card));

size_t free, total;
CUDA_ASSERT(cudaMemGetInfo(&free, &total));
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}",
card, free, total, size);
cudaDeviceProp prop;
CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0));
size_t alignment = prop.textureAlignment;
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}",
card, free, total, size, alignment);
return std::make_shared<MemPool>(
std::make_shared<NvidiaMemory>(),
size,
256ul);
alignment);
#else
return nullptr;
#endif
}

Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {}

void Nvidia::setContext() const noexcept {
setDevice(_card);
void Nvidia::setContext() const {
#ifdef USE_CUDA
CUDA_ASSERT(cudaSetDevice(_card));
#endif
}

}// namespace refactor::hardware
19 changes: 0 additions & 19 deletions src/02hardware/src/devices/nvidia/functions.cu

This file was deleted.

24 changes: 0 additions & 24 deletions src/02hardware/src/devices/nvidia/functions.cuh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
#include "functions.cuh"
#include "memory.cuh"
#ifdef USE_CUDA

#include "memory.hh"
#include "common.h"
#include <cuda_runtime.h>

#define CUDA_ASSERT(STATUS) \
if (auto status = (STATUS); status != cudaSuccess) { \
RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \
cudaGetErrorString(status), (int) status)); \
}

namespace refactor::hardware {
using M = NvidiaMemory;
Expand Down Expand Up @@ -29,3 +38,5 @@ namespace refactor::hardware {
}

}// namespace refactor::hardware

#endif
File renamed without changes.
6 changes: 4 additions & 2 deletions src/03runtime/include/runtime/stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ namespace refactor::runtime {
decltype(_device));

decltype(_graph) const &graph() const noexcept { return _graph; }
void setData(count_t, void const *, size_t);
auto setData(count_t, size_t) -> Arc<hardware::Device::Blob>;
void setData(count_t, Arc<hardware::Device::Blob>);
bool getData(count_t, void *, size_t) const;
auto getData(count_t) const -> Arc<hardware::Device::Blob>;
void setData(count_t, void const *, size_t);
bool copyData(count_t, void *, size_t) const;
void run();
auto bench(void (*sync)()) -> std::vector<std::chrono::nanoseconds>;
void trace(std::function<void(count_t, void const *const *, void const *const *)>);
Expand Down
12 changes: 9 additions & 3 deletions src/03runtime/src/stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,21 @@ namespace refactor::runtime {
std::move(edges),
} {}

auto Stream::setData(count_t i, size_t size) -> Arc<hardware::Device::Blob> {
return _graph.edges[i].blob = _device->malloc(size);
}
void Stream::setData(count_t i, Arc<hardware::Device::Blob> blob) {
_graph.edges[i].blob = std::move(blob);
}
void Stream::setData(count_t i, void const *data, size_t size) {
auto blob = _device->malloc(size);
blob->copyFromHost(data, size);
_graph.edges[i].blob = std::move(blob);
}
void Stream::setData(count_t i, Arc<hardware::Device::Blob> blob) {
_graph.edges[i].blob = std::move(blob);
auto Stream::getData(count_t i) const -> Arc<hardware::Device::Blob> {
return _graph.edges[i].blob;
}
bool Stream::getData(count_t i, void *data, size_t size) const {
bool Stream::copyData(count_t i, void *data, size_t size) const {
if (!_graph.edges[i].blob) { return false; }
_graph.edges[i].blob->copyToHost(data, size);
return true;
Expand Down
2 changes: 2 additions & 0 deletions src/04kernel/include/kernel/collectors/simple_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ namespace refactor::kernel {
And,
Or,
Xor,
Mod,
Fmod,
};

std::string_view opName(SimpleBinaryType type);
Expand Down
2 changes: 2 additions & 0 deletions src/04kernel/src/collectors/simple_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ namespace refactor::kernel {
CASE(And);
CASE(Or);
CASE(Xor);
CASE(Mod);
CASE(Fmod);
default:
UNREACHABLE();
}
Expand Down
Loading

0 comments on commit 2f2ad5d

Please sign in to comment.