Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ GPU ] split kernel registration from forwarding function in addition_layer_cl and transpose_cl #2810

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions nntrainer/cl_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ static void add_default_object(ClContext &cc) {
ml::train::LayerType::LAYER_FC);
}

cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>,
AdditionLayerCL::type,
ml::train::LayerType::LAYER_ADDITION);
if (AdditionLayerCL::registerClKernels()) {
cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>,
AdditionLayerCL::type,
ml::train::LayerType::LAYER_ADDITION);
}

// @todo swiglulayercl also needs to be updated.
cc.registerFactory(nntrainer::createLayer<SwiGLULayerCl>, SwiGLULayerCl::type,
Expand All @@ -62,10 +64,11 @@ static void add_default_object(ClContext &cc) {
ConcatLayerCl::type, ml::train::LayerType::LAYER_CONCAT);
}

// @todo transposlayercl also needs to be updated.
cc.registerFactory(nntrainer::createLayer<TransposeLayerCl>,
TransposeLayerCl::type,
ml::train::LayerType::LAYER_TRANSPOSE);
if (TransposeLayerCl::registerClKernels()) {
cc.registerFactory(nntrainer::createLayer<TransposeLayerCl>,
TransposeLayerCl::type,
ml::train::LayerType::LAYER_TRANSPOSE);
}
}

static void registerer(ClContext &cc) noexcept {
Expand Down
18 changes: 14 additions & 4 deletions nntrainer/layers/cl_layers/addition_layer_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,20 @@
#include <cl_context.h>
#include <common_properties.h>
#include <layer_devel.h>
#include <layer_impl_cl.h>

namespace nntrainer {

/**
* @class AdditionLayerCL
* @brief Addition Layer
*/
class AdditionLayerCL : public Layer {
class AdditionLayerCL : public LayerImplCl {
public:
/**
* @brief Constructor of Addition Layer
*/
AdditionLayerCL() : Layer(), add_props(props::Print()) {}
AdditionLayerCL() : LayerImplCl(), add_props(props::Print()) {}

/**
* @brief Destructor of Addition Layer
Expand Down Expand Up @@ -93,10 +94,19 @@ class AdditionLayerCL : public Layer {
*/
const std::string getType() const override { return AdditionLayerCL::type; };

std::tuple<props::Print>
add_props; /**< fc layer properties : unit - number of output neurons */
/**
* @brief Register Cl Kernels for `AdditionLayerCl`, bypassing the
* registration process since it does not require any specific kernels. This
* function simply returns `true` because `AdditionLayerCl` does not rely on
* the specific kernels for the layer.
*/
static bool registerClKernels() { return true; };
Copy link
Member

@myungjoo myungjoo Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it register ClKernels?
or
does this check if CLKernels are registered or not?

(Clarify at brief)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missed your previous message—sorry for the delayed response!

This change intends to handle layer-specific CL kernel registrations. However, both the addition and transpose layers don't actually require any specific kernels, so we're skipping the registration step and simply returning true.

I've updated the @brief section accordingly. Please let me know if there's a better approach, and I'd be happy to implement it. Thanks again! 😊


inline static const std::string type = "addition";

private:
std::tuple<props::Print>
add_props; /**< fc layer properties : unit - number of output neurons */
};

} // namespace nntrainer
Expand Down
21 changes: 12 additions & 9 deletions nntrainer/layers/cl_layers/transpose_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <common_properties.h>
#include <layer_devel.h>
#include <layer_impl_cl.h>
#include <opencl_buffer.h>
#include <opencl_kernel.h>

Expand All @@ -25,13 +26,13 @@ namespace nntrainer {
* @brief A tranpose layer.
*
*/
class TransposeLayerCl final : public Layer {
class TransposeLayerCl final : public LayerImplCl {
public:
/**
* @brief Construct a new transpose layer object
*
*/
TransposeLayerCl() : Layer(), transpose_props(props::Print()) {}
TransposeLayerCl() : LayerImplCl(), transpose_props(props::Print()) {}

/**
* @brief Destroy the transpose layer object
Expand Down Expand Up @@ -82,15 +83,17 @@ class TransposeLayerCl final : public Layer {
*/
void setProperty(const std::vector<std::string> &values) override;

inline static const std::string type = "transpose";
/**
* @brief Register Cl Kernels for `TransposeLayerCl`, bypassing the
* registration process since it does not require any specific kernels. This
* function simply returns `true` because `TransposeLayerCl` does not rely on
* the specific kernels for the layer.
*/
static bool registerClKernels() { return true; };

static opencl::Kernel kernel_transpose_axis0;
static opencl::Kernel kernel_transpose_fp16_axis0;
static opencl::Kernel kernel_transpose_axis1;
static opencl::Kernel kernel_transpose_fp16_axis1;
static opencl::Kernel kernel_transpose_axis2;
static opencl::Kernel kernel_transpose_fp16_axis2;
inline static const std::string type = "transpose";

private:
std::tuple<props::Print> transpose_props; /**< transpose layer properties :
unit - number of output neurons */
};
Expand Down
Loading