add qnn op package 2

This PR includes qnn op package 2 **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon <[email protected]>
nnstreamer · Dec 26, 2024 · 4758a5f · 4758a5f
1 parent 3ac9404
commit 4758a5f
Show file tree

Hide file tree

Showing 18 changed files with 7,741 additions and 0 deletions.
diff --git a/nntrainer/npu/qnn/LLaMAPackage/Makefile b/nntrainer/npu/qnn/LLaMAPackage/Makefile
diff --git a/nntrainer/npu/qnn/LLaMAPackage/src/LLaMAPackageInterface.cpp b/nntrainer/npu/qnn/LLaMAPackage/src/LLaMAPackageInterface.cpp
diff --git a/nntrainer/npu/qnn/LLaMAPackage/src/ops/CausalMask.cpp b/nntrainer/npu/qnn/LLaMAPackage/src/ops/CausalMask.cpp
@@ -0,0 +1,146 @@
+//==============================================================================
+// Auto Generated Code for LLaMAPackage
+//==============================================================================
+
+#include "HTP/core/constraints.h"
+#include "HTP/core/op_package_feature_support.h"
+#include "HTP/core/op_register_ext.h"
+#include "HTP/core/optimize.h"
+#include "HTP/core/simple_reg.h"
+#include "QnnOpPackage.h"
+
+#define MASK_INFINITY 1e15
+
+BEGIN_PKG_OP_DEFINITION(PKG_CausalMask);
+
+// op execute function declarations
+template <typename TensorType>
+GraphStatus causalmaskImpl(TensorType &out_0, const TensorType &in_0);
+
+// forward declaration of sample cost function
+static float causalmaskCostFunc(const Op *op);
+
+/*
+ * method 1 for defining op, using default cost value (i.e. GLACIAL) and default
+ * flag (Flags::RESOURCE_HVX) syntax: DEF_PACKAGE_OP(F,OP) e.g.
+ * DEF_PACKAGE_OP((causalmaskImpl<Tensor>), "CausalMask")
+ */
+DEF_PACKAGE_OP((causalmaskImpl<Tensor>), "CausalMask")
+
+/*
+ * method 2 for defining op with specified cost value (one of GLACIAL, SNAIL,
+ * FAST, FREE) and provided flags syntax:
+ * DEF_PACKAGE_OP_AND_COST_AND_FLAGS(F,OP,COST,...) can use zero or more flags,
+ * FLAG options are IS_CONST, INHIBIT_CONST_PROP, RESOURCE_HVX, RESOURCE_HMX(not
+ * supported in external op packages) e.g.
+ * DEF_PACKAGE_OP_AND_COST_AND_FLAGS((causalmaskImpl<PlainFloatTensor>),
+ * "CausalMask", SNAIL)
+ */
+
+/*
+ * method 3 for defining op with cost function pointer and provided flags
+ * cost function pointer type: typedef float (*cost_function) (const Op * op);
+ * syntax: DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS(F,OP,COST_F,...)
+ * e.g. DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS((causalmaskImpl<PlainFloatTensor>),
+ * "CausalMask", causalmaskCostFunc, Flags::RESOURCE_HVX)
+ */
+
+/*
+ * optimization definitions
+ * need to be global in the package
+ * one definition per optimization
+ * syntax:
+ * DEF_PACKAGE_OPTIMIZATION(PRIORITY,MATCHCODE,CONSTRAINTCODE,REPLACECODE)
+ * PRIORITY predefined values include EARLY(2000), MIDDLE(3000), LATE(4000)
+ * HTP core provides some replacement functions for op package to use
+ * for more information about optimization rules, please refer to HTP core
+ * documentations
+ */
+
+/*
+ * op parameter order definitions
+ * need to be global in the package
+ * one definition per op, and this is optional
+ * syntax:
+ * DEF_PACKAGE_PARAM_ORDER(OP,PARAM1,MANDATORY1,DEFAULT1,PARAM2,MANDATORY2,DEFAULT2...)
+ * one or more parameters can be specified for each op
+ * order of parameters listed determines the order of parameters passed into op
+ * execution functions if an op does not have a parameter order definition,
+ * parameter order passed into Qnn_addNode will be passed into op execution
+ * functions if an op has a parameter order definition, any parameter passed
+ * into Qnn_addNode with unlisted name will be abandoned if two or more op
+ * packages with the same package name will be registered, they cannot list
+ *   conflicting parameter orders
+ * PARAM refers to parameter name as a string literal
+ * MANDATORY refers to whether this parameter is required to be provided at
+ * Qnn_addNode DEFAULT is used when MANDATORY is false if provided as
+ * Qnn_Param_t*, DEFAULT will be used for graph construction when this parameter
+ * is not provided at Qnn_addNode if provided as nullptr, graph construction
+ * will skip this parameter when this parameter is not provided at Qnn_addNode
+ */
+
+/* execute functions for ops */
+
+template <typename TensorType>
+GraphStatus causalmaskImpl(TensorType &out_0, const TensorType &in_0)
+
+{
+  /*
+   * add code here
+   * */
+  /*
+   * To have good performance and stability, it is required to avoid heap memory
+   * allocation in this function. The heap memory allocation includes but not
+   * limited to calling malloc, operator new, constructing STL container objects
+   * like std::vector with default allocator, and adding items like calling
+   * std::vector::push_back to STL container objects with default allocator.
+   *
+   * Please check in SDK documentation for more information.
+   */
+  out_0.set_dims(in_0);
+
+  int old_dim = 0;
+
+  // NHSD
+  auto [b_in, h_in, w_in, d_in] = in_0.dims();
+
+  // S > 1 => mask
+  if (w_in > 1) {
+    for (Idx b = 0; b < b_in; b++) {
+      for (Idx h = 0; h < h_in; h++) {
+        for (Idx w = 0; w < w_in; w++) {
+          // CausalMask
+          for (Idx d = 0; d < d_in; d++) {
+
+            float in_value = in_0(b, h, w, d);
+
+            if (d > w + old_dim)
+              out_0(b, h, w, d) = in_value - MASK_INFINITY;
+            else
+              out_0(b, h, w, d) = in_value;
+          }
+        }
+      }
+    }
+  } else {
+    auto in_ptr = in_0.raw_data_const();
+    auto out_ptr = out_0.raw_data();
+    memcpy(out_ptr, in_ptr, b_in * h_in * w_in * d_in * 4);
+  }
+
+  return GraphStatus::Success;
+}
+
+__attribute__((unused)) static float causalmaskCostFunc(const Op *op) {
+  /*
+   * add code here
+   * */
+
+  float cost = 0.0; // add cost computation here
+  return cost;
+}
+
+/* At the bottom of the op file, call END_PKG_OP_DEFINITION(<name>),
+   where <name> is as BEGIN_PKG_OP_DEFINITION
+*/
+END_PKG_OP_DEFINITION(PKG_CausalMask);
diff --git a/nntrainer/npu/qnn/LLaMAPackage/src/ops/HeadMatmul.cpp b/nntrainer/npu/qnn/LLaMAPackage/src/ops/HeadMatmul.cpp
@@ -0,0 +1,164 @@
+//==============================================================================
+// Auto Generated Code for LLaMAPackage
+//==============================================================================
+
+#include "HTP/core/constraints.h"
+#include "HTP/core/op_package_feature_support.h"
+#include "HTP/core/op_register_ext.h"
+#include "HTP/core/optimize.h"
+#include "HTP/core/simple_reg.h"
+#include "QnnOpPackage.h"
+
+BEGIN_PKG_OP_DEFINITION(PKG_HeadMatmul);
+
+static Qnn_Scalar_t sg_opDefaultTranspose_In0Scalar = {
+  .dataType = Qnn_DataType_t::QNN_DATATYPE_BOOL_8, .bool8Value = false};
+static Qnn_Param_t sg_opDefaultTranspose_In0 = {
+  .paramType = QNN_PARAMTYPE_SCALAR,
+  .scalarParam = sg_opDefaultTranspose_In0Scalar};
+static Qnn_Scalar_t sg_opDefaultTranspose_In1Scalar = {
+  .dataType = Qnn_DataType_t::QNN_DATATYPE_BOOL_8, .bool8Value = false};
+static Qnn_Param_t sg_opDefaultTranspose_In1 = {
+  .paramType = QNN_PARAMTYPE_SCALAR,
+  .scalarParam = sg_opDefaultTranspose_In1Scalar};
+
+// op execute function declarations
+template <typename TensorType>
+GraphStatus headmatmulImpl(TensorType &out_0, const TensorType &in_0,
+                           const TensorType &in_1,
+                           const QuantUint16Tensor &transpose_in0,
+                           const QuantUint16Tensor &transpose_in1);
+
+// forward declaration of sample cost function
+static float headmatmulCostFunc(const Op *op);
+
+/*
+ * method 1 for defining op, using default cost value (i.e. GLACIAL) and default
+ * flag (Flags::RESOURCE_HVX) syntax: DEF_PACKAGE_OP(F,OP) e.g.
+ * DEF_PACKAGE_OP((headmatmulImpl<Tensor>), "HeadMatmul")
+ */
+DEF_PACKAGE_OP((headmatmulImpl<Tensor>), "HeadMatmul")
+
+/*
+ * method 2 for defining op with specified cost value (one of GLACIAL, SNAIL,
+ * FAST, FREE) and provided flags syntax:
+ * DEF_PACKAGE_OP_AND_COST_AND_FLAGS(F,OP,COST,...) can use zero or more flags,
+ * FLAG options are IS_CONST, INHIBIT_CONST_PROP, RESOURCE_HVX, RESOURCE_HMX(not
+ * supported in external op packages) e.g.
+ * DEF_PACKAGE_OP_AND_COST_AND_FLAGS((headmatmulImpl<PlainFloatTensor>),
+ * "HeadMatmul", SNAIL)
+ */
+
+/*
+ * method 3 for defining op with cost function pointer and provided flags
+ * cost function pointer type: typedef float (*cost_function) (const Op * op);
+ * syntax: DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS(F,OP,COST_F,...)
+ * e.g. DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS((headmatmulImpl<PlainFloatTensor>),
+ * "HeadMatmul", headmatmulCostFunc, Flags::RESOURCE_HVX)
+ */
+
+/*
+ * optimization definitions
+ * need to be global in the package
+ * one definition per optimization
+ * syntax:
+ * DEF_PACKAGE_OPTIMIZATION(PRIORITY,MATCHCODE,CONSTRAINTCODE,REPLACECODE)
+ * PRIORITY predefined values include EARLY(2000), MIDDLE(3000), LATE(4000)
+ * HTP core provides some replacement functions for op package to use
+ * for more information about optimization rules, please refer to HTP core
+ * documentations
+ */
+
+/*
+ * op parameter order definitions
+ * need to be global in the package
+ * one definition per op, and this is optional
+ * syntax:
+ * DEF_PACKAGE_PARAM_ORDER(OP,PARAM1,MANDATORY1,DEFAULT1,PARAM2,MANDATORY2,DEFAULT2...)
+ * one or more parameters can be specified for each op
+ * order of parameters listed determines the order of parameters passed into op
+ * execution functions if an op does not have a parameter order definition,
+ * parameter order passed into Qnn_addNode will be passed into op execution
+ * functions if an op has a parameter order definition, any parameter passed
+ * into Qnn_addNode with unlisted name will be abandoned if two or more op
+ * packages with the same package name will be registered, they cannot list
+ *   conflicting parameter orders
+ * PARAM refers to parameter name as a string literal
+ * MANDATORY refers to whether this parameter is required to be provided at
+ * Qnn_addNode DEFAULT is used when MANDATORY is false if provided as
+ * Qnn_Param_t*, DEFAULT will be used for graph construction when this parameter
+ * is not provided at Qnn_addNode if provided as nullptr, graph construction
+ * will skip this parameter when this parameter is not provided at Qnn_addNode
+ */
+DEF_PACKAGE_PARAM_ORDER("HeadMatmul", "transpose_in0", false,
+                        &sg_opDefaultTranspose_In0, "transpose_in1", false,
+                        &sg_opDefaultTranspose_In1)
+
+/* execute functions for ops */
+
+template <typename TensorType>
+GraphStatus headmatmulImpl(TensorType &out_0, const TensorType &in_0,
+                           const TensorType &in_1,
+                           const QuantUint16Tensor &transpose_in0,
+                           const QuantUint16Tensor &transpose_in1)
+
+{
+  /*
+   * add code here
+   * */
+  /*
+   * To have good performance and stability, it is required to avoid heap memory
+   * allocation in this function. The heap memory allocation includes but not
+   * limited to calling malloc, operator new, constructing STL container objects
+   * like std::vector with default allocator, and adding items like calling
+   * std::vector::push_back to STL container objects with default allocator.
+   *
+   * Please check in SDK documentation for more information.
+   */
+
+  auto transpose_in0_ = transpose_in0(0, 0, 0, 0);
+  auto transpose_in1_ = transpose_in1(0, 0, 0, 0);
+
+  auto [b_in, h_in, w_in, d_in] = in_0.dims();
+  auto [b_in2, h_in2, w_in2, d_in2] = in_1.dims();
+
+  if (transpose_in0_ && transpose_in1_) {
+
+    // Q KT head matmul
+    const size_t dims[] = {b_in, w_in, h_in, h_in};
+    out_0.set_dims(dims);
+    debuglog("HeadMatmul execute... dims=(%zdx%zdx%zdx%zd)", out_0.dim(0),
+             out_0.dim(1), out_0.dim(2), out_0.dim(3));
+
+  } else if (transpose_in0_) {
+
+  } else if (transpose_in1_) {
+
+    // QKT V head matmul
+    const size_t dims[] = {b_in, w_in, h_in, d_in2};
+    out_0.set_dims(dims);
+    debuglog("HeadMatmul execute... dims=(%zdx%zdx%zdx%zd)", out_0.dim(0),
+             out_0.dim(1), out_0.dim(2), out_0.dim(3));
+
+    // Todo out matrix needs transpose, we directly calculate the final
+    // dimensions.
+
+  } else {
+  }
+
+  return GraphStatus::Success;
+}
+
+__attribute__((unused)) static float headmatmulCostFunc(const Op *op) {
+  /*
+   * add code here
+   * */
+
+  float cost = 0.0; // add cost computation here
+  return cost;
+}
+
+/* At the bottom of the op file, call END_PKG_OP_DEFINITION(<name>),
+   where <name> is as BEGIN_PKG_OP_DEFINITION
+*/
+END_PKG_OP_DEFINITION(PKG_HeadMatmul);