[ GPU ] separate FP16-related functions to support ENABLE_FP16=FALSE

- This commit resolves nnstreamer#2776 - This commit separate FP16-related functions from _cl.cpp file. - Minor bug is fixed Signed-off-by: Eunju Yang <[email protected]>
EunjuYang · Nov 4, 2024 · a807f70 · a807f70
1 parent e86b923
commit a807f70
Show file tree

Hide file tree

Showing 15 changed files with 901 additions and 759 deletions.
diff --git a/nntrainer/layers/cl_layers/concat_cl.cpp b/nntrainer/layers/cl_layers/concat_cl.cpp
diff --git a/nntrainer/layers/cl_layers/concat_cl.h b/nntrainer/layers/cl_layers/concat_cl.h
@@ -106,12 +106,15 @@ class ConcatLayerCl : public Layer {
 
   inline static const std::string type = "concat";
 
-  static opencl::Kernel kernel_concat_axis3;
-  static opencl::Kernel kernel_concat_axis3_fp16;
-  static opencl::Kernel kernel_concat_axis2;
-  static opencl::Kernel kernel_concat_axis2_fp16;
   static opencl::Kernel kernel_concat_axis1;
+  static opencl::Kernel kernel_concat_axis2;
+  static opencl::Kernel kernel_concat_axis3;
+
+#ifdef ENABLE_FP16
   static opencl::Kernel kernel_concat_axis1_fp16;
+  static opencl::Kernel kernel_concat_axis2_fp16;
+  static opencl::Kernel kernel_concat_axis3_fp16;
+#endif
 
   /**
    * @brief Process data and dimensions for concat
@@ -174,6 +177,7 @@ class ConcatLayerCl : public Layer {
                        unsigned int input2_channels);
 
 #ifdef ENABLE_FP16
+
   /**
    * @brief     concat computation for axis 3 fp16
    * @param[in] matAdata fp16 * for Input Tensor A
@@ -231,6 +235,7 @@ class ConcatLayerCl : public Layer {
                             unsigned int input1_channels,
                             unsigned int input2_channelst);
 #endif
+
 private:
   std::tuple<props::ConcatDimension> concat_props;
 };