quic · quic-zhanweiw · Feb 10, 2025 · Jan 16, 2025 · Jan 28, 2025 · quic-prudhvi
diff --git a/pybind/AppBuilder.cpp b/pybind/AppBuilder.cpp
@@ -7,8 +7,10 @@
 //==============================================================================
 
 #include "AppBuilder.h"
+#include "Lora.hpp"
+#include <ostream>
 
-#define APPBUILDER_VERSION    "2.28.0"
+#define APPBUILDER_VERSION "2.28.2"
 
 ShareMemory::ShareMemory(const std::string& share_memory_name, const size_t share_memory_size) {
     m_share_memory_name = share_memory_name;
@@ -32,6 +34,16 @@ QNNContext::QNNContext(const std::string& model_name, const std::string& proc_na
     g_LibAppBuilder.ModelInitialize(model_name, proc_name, model_path, backend_lib_path, system_lib_path);
 }
 
+QNNContext::QNNContext(const std::string& model_name,
+                       const std::string& model_path, const std::string& backend_lib_path, 
+                       const std::string& system_lib_path, const std::vector<LoraAdapter>& lora_adapters) {
+
+    m_model_name = model_name;
+    m_lora_adapters = lora_adapters;
+
+    g_LibAppBuilder.ModelInitialize(model_name, model_path, backend_lib_path, system_lib_path, m_lora_adapters);
+}
+
 QNNContext::~QNNContext() {
     if (m_proc_name.empty())
         g_LibAppBuilder.ModelDestroy(m_model_name);
@@ -100,9 +112,12 @@ PYBIND11_MODULE(appbuilder, m) {
 
     py::class_<QNNContext>(m, "QNNContext")
         .def(py::init<const std::string&, const std::string&, const std::string&, const std::string&>())
+        .def(py::init<const std::string&, const std::string&, const std::string&, const std::string&, const std::vector<LoraAdapter>&>())
         .def(py::init<const std::string&, const std::string&, const std::string&, const std::string&, const std::string&>())
         .def("Inference", py::overload_cast<const std::vector<py::array_t<float>>&, const std::string&>(&QNNContext::Inference))
         .def("Inference", py::overload_cast<const ShareMemory&, const std::vector<py::array_t<float>>&, const std::string&>(&QNNContext::Inference));
 
+    py::class_<LoraAdapter>(m, "LoraAdapter")
+        .def(py::init<const std::string &, const std::vector<std::string> &>());
 }
 
diff --git a/pybind/AppBuilder.h b/pybind/AppBuilder.h
@@ -16,6 +16,7 @@
 #include <pybind11/stl.h>
 
 #include "LibAppBuilder.hpp"
+#include "Lora.hpp"
 
 using namespace std;
 namespace py = pybind11;
@@ -159,12 +160,19 @@ class QNNContext {
 public:
     std::string m_model_name;
     std::string m_proc_name;
+    std::vector<LoraAdapter> m_lora_adapters;  
 
     QNNContext(const std::string& model_name,
-       	       const std::string& model_path, const std::string& backend_lib_path, const std::string& system_lib_path);
+       	       const std::string& model_path, const std::string& backend_lib_path, 
+               const std::string& system_lib_path);
+
+    QNNContext(const std::string& model_name,
+       	       const std::string& model_path, const std::string& backend_lib_path, 
+               const std::string& system_lib_path, const std::vector<LoraAdapter>& lora_adapters);   
 
     QNNContext(const std::string& model_name, const std::string& proc_name,
-       	       const std::string& model_path, const std::string& backend_lib_path, const std::string& system_lib_path);
+       	       const std::string& model_path, const std::string& backend_lib_path, 
+               const std::string& system_lib_path);
 
     std::vector<py::array_t<float>> Inference(const std::vector<py::array_t<float>>& input, const std::string& perf_profile = "default");
     std::vector<py::array_t<float>> Inference(const ShareMemory& share_memory, const std::vector<py::array_t<float>>& input, const std::string& perf_profile = "default");

diff --git a/script/qai_appbuilder/qnncontext.py b/script/qai_appbuilder/qnncontext.py
@@ -117,6 +117,55 @@ def Config(qnn_lib_path: str = "None",
         ProfilingLevel.SetProfilingLevel(profiling_level)
 
 
+class QNNLoraContext:
+    """High-level Python wrapper for a AppBuilder model."""
+    def __init__(self,
+                model_name: str = "None",
+                model_path: str = "None",
+                backend_lib_path: str = "None",
+                system_lib_path: str = "None",
+                lora_adapters= None,
+                runtime : str = Runtime.HTP
+    ) -> None:
+        """Load a QNN model from `model_path`
+
+        Args:
+            model_path (str): model path
+            bin_files (str) : 
+        """
+        self.model_path = model_path
+        self.lora_adapters = lora_adapters
+
+        m_lora_adapters = []
+        for adapter in lora_adapters:
+            m_lora_adapters.append(adapter.m_adapter)
+
+        if self.model_path is None:
+            raise ValueError("model_path must be specified!")
+
+        if not os.path.exists(self.model_path):
+            raise ValueError(f"Model path does not exist: {self.model_path}")
+
+        if (backend_lib_path == "None"):
+            backend_lib_path = g_backend_lib_path
+        if (system_lib_path == "None"):
+            system_lib_path = g_system_lib_path
+
+        self.m_context = appbuilder.QNNContext(model_name, model_path,
+                                               backend_lib_path, system_lib_path,
+                                               m_lora_adapters)
+
+    #@timer
+    def Inference(self, input, perf_profile = PerfProfile.DEFAULT):
+        return self.m_context.Inference(input, perf_profile)
+
+    #@timer
+    def __del__(self):
+        if hasattr(self, "m_context") and self.m_context is not None:
+            del(self.m_context)
+            m_context = None
+
+
 class QNNContext:
     """High-level Python wrapper for a AppBuilder model."""
     def __init__(self,
@@ -144,7 +193,7 @@ def __init__(self,
         if (system_lib_path == "None"):
             system_lib_path = g_system_lib_path
 
-        self.m_context = appbuilder.QNNContext(model_name, model_path, backend_lib_path, system_lib_path)
+        self.m_context = appbuilder.QNNContext(model_name, model_path, backend_lib_path, system_lib_path, [])
 
     #@timer
     def Inference(self, input, perf_profile = PerfProfile.DEFAULT):
@@ -199,7 +248,6 @@ def __del__(self):
             del(self.m_context)
             m_context = None
 
-
 class QNNShareMemory:
     """High-level Python wrapper for a AppBuilder model."""
     def __init__(self,
@@ -220,4 +268,10 @@ def __del__(self):
         if hasattr(self, "m_memory") and self.m_memory is not None:
             del(self.m_memory)
             m_memory = None
-
+
+class LoraAdapter:   # this will just hold data
+    m_adapter = None
+
+    def __init__(self, graph_name, lora_file_paths):
+        self.m_adapter = appbuilder.LoraAdapter(graph_name, lora_file_paths)  # cpp object
+
diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
 from setuptools import Extension, setup, find_packages
 from setuptools.command.build_ext import build_ext
 
-VERSION = "2.28.0"
+VERSION = "2.28.2"
 CONFIG = "Release"  # Release, RelWithDebInfo
 package_name = "qai_appbuilder"
 

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -26,7 +26,8 @@ set(APP_SOURCES "QnnSampleApp.cpp"
                 "Utils/IOTensor.cpp"
                 "Utils/QnnSampleAppUtils.cpp"
                 "WrapperUtils/QnnWrapperUtils.cpp"
-                "LibAppBuilder.cpp")
+                "LibAppBuilder.cpp"
+                "Lora.cpp")
 
 if (WIN32)
 set(APP_SOURCES_ARCH "PAL/src/windows/Common.cpp"

diff --git a/src/LibAppBuilder.cpp b/src/LibAppBuilder.cpp
@@ -22,6 +22,7 @@
 #include "PAL/DynamicLoading.hpp"
 #include "PAL/GetOpt.hpp"
 #include "QnnSampleApp.hpp"
+#include "Lora.hpp"
 #include "QnnSampleAppUtils.hpp"
 #include "LibAppBuilder.hpp"
 #ifdef _WIN32
@@ -50,7 +51,8 @@ namespace libappbuilder {
 std::unique_ptr<sample_app::QnnSampleApp> initQnnSampleApp(std::string cachedBinaryPath,
                                                            std::string backEndPath,
                                                            std::string systemLibraryPath,
-                                                           bool loadFromCachedBinary) {
+                                                           bool loadFromCachedBinary,
+                                                           const std::vector<LoraAdapter>& lora_adapters) {
   // Just keep blank for below paths.
   std::string modelPath;
   std::string cachedBinaryPath2;
@@ -100,7 +102,7 @@ std::unique_ptr<sample_app::QnnSampleApp> initQnnSampleApp(std::string cachedBin
   sg_qnnInterface = qnnFunctionPointers.qnnInterface;
   std::unique_ptr<sample_app::QnnSampleApp> app(new sample_app::QnnSampleApp(qnnFunctionPointers, "null", opPackagePaths, sg_backendHandle, "null",
                                                                              debug, parsedOutputDataType, parsedInputDataType, sg_parsedProfilingLevel,
-                                                                             dumpOutputs, cachedBinaryPath2, saveBinaryName));
+                                                                             dumpOutputs, cachedBinaryPath2, saveBinaryName, lora_adapters));
     return app;
 }
 
@@ -285,9 +287,9 @@ bool DeleteShareMemory(std::string share_memory_name) {
 }
 
 bool ModelInitializeEx(const std::string& model_name, const std::string& proc_name, const std::string& model_path,
-                       const std::string& backend_lib_path, const std::string& system_lib_path) {
+                       const std::string& backend_lib_path, const std::string& system_lib_path, 
+                       const std::vector<LoraAdapter>& lora_adapters) {
   bool result = false;
-
   QNN_INF("LibAppBuilder::ModelInitialize: %s \n", model_name.c_str());
 
 #ifdef _WIN32
@@ -322,7 +324,7 @@ bool ModelInitializeEx(const std::string& model_name, const std::string& proc_na
   }
 
   {
-    std::unique_ptr<sample_app::QnnSampleApp> app = libappbuilder::initQnnSampleApp(cachedBinaryPath, backEndPath, systemLibraryPath, loadFromCachedBinary);
+    std::unique_ptr<sample_app::QnnSampleApp> app = libappbuilder::initQnnSampleApp(cachedBinaryPath, backEndPath, systemLibraryPath, loadFromCachedBinary, lora_adapters);
 
     if (nullptr == app) {
       return false;
@@ -390,6 +392,12 @@ bool ModelInitializeEx(const std::string& model_name, const std::string& proc_na
         }
     }
 
+    // apply lora Adapter on graph
+    if (app->binaryUpdates() &&
+        sample_app::StatusCode::SUCCESS != app->contextApplyBinarySection(QNN_CONTEXT_SECTION_UPDATABLE)) {
+        return app->reportError("Binary update/execution failure");
+    }
+
     timerHelper.Print("model_initialize");
 
     sg_model_map.insert(std::make_pair(model_name, std::move(app)));
@@ -510,7 +518,13 @@ bool LibAppBuilder::ModelInitialize(const std::string& model_name, const std::st
 
 bool LibAppBuilder::ModelInitialize(const std::string& model_name, const std::string& model_path,
                                          const std::string& backend_lib_path, const std::string& system_lib_path) {
-    return ModelInitializeEx(model_name, "", model_path, backend_lib_path, system_lib_path);
+    std::vector<LoraAdapter> Adapters = std::vector<LoraAdapter>();
+    return ModelInitializeEx(model_name, "", model_path, backend_lib_path, system_lib_path, Adapters);   
+}
+
+bool LibAppBuilder::ModelInitialize(const std::string& model_name, const std::string& model_path,
+                                         const std::string& backend_lib_path, const std::string& system_lib_path,const std::vector<LoraAdapter>& lora_adapters) {
+    return ModelInitializeEx(model_name, "", model_path, backend_lib_path, system_lib_path, lora_adapters);
 }
 
 bool LibAppBuilder::ModelInference(std::string model_name, std::string proc_name, std::string share_memory_name,

diff --git a/src/LibAppBuilder.hpp b/src/LibAppBuilder.hpp
@@ -12,6 +12,7 @@
 #include <string>
 #include <vector>
 #include <chrono>
+#include "Lora.hpp"
 
 #ifdef _WIN32
     #ifdef DLL_EXPORTS
@@ -54,6 +55,9 @@ class LIBAPPBUILDER_API LibAppBuilder
     bool ModelInitialize(const std::string& model_name, const std::string& proc_name, const std::string& model_path,
                                const std::string& backend_lib_path, const std::string& system_lib_path);
 
+    bool ModelInitialize(const std::string& model_name, const std::string& model_path,
+                               const std::string& backend_lib_path, const std::string& system_lib_path, const std::vector<LoraAdapter>& lora_adapters);
+
     bool ModelInference(std::string model_name, std::vector<uint8_t*>& inputBuffers, 
                               std::vector<uint8_t*>& outputBuffers, std::vector<size_t>& outputSize,
                               std::string& perfProfile);

diff --git a/src/Lora.cpp b/src/Lora.cpp
@@ -0,0 +1,21 @@
+//==============================================================================
+//
+// Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved.
+// 
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//==============================================================================
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "Lora.hpp"
+
+LoraAdapter::LoraAdapter(const std::string &graph_name, const std::vector<std::string> &bin_paths) {
+    m_graph_name = graph_name;
+    m_bin_paths = bin_paths;
+}
+
+LoraAdapter::~LoraAdapter(){
+}
diff --git a/src/Lora.hpp b/src/Lora.hpp
@@ -0,0 +1,33 @@
+//==============================================================================
+//
+// Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved.
+// 
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//==============================================================================
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+    #ifdef DLL_EXPORTS
+        #define LIBAPPBUILDER_API __declspec(dllexport)
+    #else
+        #define LIBAPPBUILDER_API __declspec(dllimport)
+    #endif
+#else // _WIN32
+    #define LIBAPPBUILDER_API
+#endif
+
+
+class LIBAPPBUILDER_API LoraAdapter{
+public:
+    std::string m_graph_name;
+    std::vector<std::string> m_bin_paths;
+
+    LoraAdapter(const std::string &graph_name, const std::vector<std::string> &bin_paths);
+
+    ~LoraAdapter();
+};