Skip to content

Commit

Permalink
[ GPU ] separate FP16-related functions to support ENABLE_FP16=FALSE
Browse files Browse the repository at this point in the history
- This commit resolves nnstreamer#2776
- This commit separate FP16-related functions from _cl.cpp file.
- Minor bug is fixed

Signed-off-by: Eunju Yang <[email protected]>
  • Loading branch information
EunjuYang committed Nov 4, 2024
1 parent e86b923 commit a807f70
Show file tree
Hide file tree
Showing 15 changed files with 901 additions and 759 deletions.
442 changes: 0 additions & 442 deletions nntrainer/layers/cl_layers/concat_cl.cpp

Large diffs are not rendered by default.

13 changes: 9 additions & 4 deletions nntrainer/layers/cl_layers/concat_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,15 @@ class ConcatLayerCl : public Layer {

inline static const std::string type = "concat";

static opencl::Kernel kernel_concat_axis3;
static opencl::Kernel kernel_concat_axis3_fp16;
static opencl::Kernel kernel_concat_axis2;
static opencl::Kernel kernel_concat_axis2_fp16;
static opencl::Kernel kernel_concat_axis1;
static opencl::Kernel kernel_concat_axis2;
static opencl::Kernel kernel_concat_axis3;

#ifdef ENABLE_FP16
static opencl::Kernel kernel_concat_axis1_fp16;
static opencl::Kernel kernel_concat_axis2_fp16;
static opencl::Kernel kernel_concat_axis3_fp16;
#endif

/**
* @brief Process data and dimensions for concat
Expand Down Expand Up @@ -174,6 +177,7 @@ class ConcatLayerCl : public Layer {
unsigned int input2_channels);

#ifdef ENABLE_FP16

/**
* @brief concat computation for axis 3 fp16
* @param[in] matAdata fp16 * for Input Tensor A
Expand Down Expand Up @@ -231,6 +235,7 @@ class ConcatLayerCl : public Layer {
unsigned int input1_channels,
unsigned int input2_channelst);
#endif

private:
std::tuple<props::ConcatDimension> concat_props;
};
Expand Down
Loading

0 comments on commit a807f70

Please sign in to comment.