Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding LoRA support + changing command format for main.cpp #6

Merged
merged 2 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion pybind/AppBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
//==============================================================================

#include "AppBuilder.h"
#include "Lora.hpp"
#include <ostream>

#define APPBUILDER_VERSION "2.28.0"
#define APPBUILDER_VERSION "2.28.2"

ShareMemory::ShareMemory(const std::string& share_memory_name, const size_t share_memory_size) {
m_share_memory_name = share_memory_name;
Expand All @@ -32,6 +34,16 @@ QNNContext::QNNContext(const std::string& model_name, const std::string& proc_na
g_LibAppBuilder.ModelInitialize(model_name, proc_name, model_path, backend_lib_path, system_lib_path);
}

QNNContext::QNNContext(const std::string& model_name,
const std::string& model_path, const std::string& backend_lib_path,
const std::string& system_lib_path, const std::vector<LoraAdapter>& lora_adapters) {

m_model_name = model_name;
m_lora_adapters = lora_adapters;

g_LibAppBuilder.ModelInitialize(model_name, model_path, backend_lib_path, system_lib_path, m_lora_adapters);
}

QNNContext::~QNNContext() {
if (m_proc_name.empty())
g_LibAppBuilder.ModelDestroy(m_model_name);
Expand Down Expand Up @@ -100,9 +112,12 @@ PYBIND11_MODULE(appbuilder, m) {

py::class_<QNNContext>(m, "QNNContext")
.def(py::init<const std::string&, const std::string&, const std::string&, const std::string&>())
.def(py::init<const std::string&, const std::string&, const std::string&, const std::string&, const std::vector<LoraAdapter>&>())
.def(py::init<const std::string&, const std::string&, const std::string&, const std::string&, const std::string&>())
.def("Inference", py::overload_cast<const std::vector<py::array_t<float>>&, const std::string&>(&QNNContext::Inference))
.def("Inference", py::overload_cast<const ShareMemory&, const std::vector<py::array_t<float>>&, const std::string&>(&QNNContext::Inference));

py::class_<LoraAdapter>(m, "LoraAdapter")
.def(py::init<const std::string &, const std::vector<std::string> &>());
}

12 changes: 10 additions & 2 deletions pybind/AppBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <pybind11/stl.h>

#include "LibAppBuilder.hpp"
#include "Lora.hpp"

using namespace std;
namespace py = pybind11;
Expand Down Expand Up @@ -159,12 +160,19 @@ class QNNContext {
public:
std::string m_model_name;
std::string m_proc_name;
std::vector<LoraAdapter> m_lora_adapters;

QNNContext(const std::string& model_name,
const std::string& model_path, const std::string& backend_lib_path, const std::string& system_lib_path);
const std::string& model_path, const std::string& backend_lib_path,
const std::string& system_lib_path);

QNNContext(const std::string& model_name,
const std::string& model_path, const std::string& backend_lib_path,
const std::string& system_lib_path, const std::vector<LoraAdapter>& lora_adapters);

QNNContext(const std::string& model_name, const std::string& proc_name,
const std::string& model_path, const std::string& backend_lib_path, const std::string& system_lib_path);
const std::string& model_path, const std::string& backend_lib_path,
const std::string& system_lib_path);

std::vector<py::array_t<float>> Inference(const std::vector<py::array_t<float>>& input, const std::string& perf_profile = "default");
std::vector<py::array_t<float>> Inference(const ShareMemory& share_memory, const std::vector<py::array_t<float>>& input, const std::string& perf_profile = "default");
Expand Down
60 changes: 57 additions & 3 deletions script/qai_appbuilder/qnncontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,55 @@ def Config(qnn_lib_path: str = "None",
ProfilingLevel.SetProfilingLevel(profiling_level)


class QNNLoraContext:
"""High-level Python wrapper for a AppBuilder model."""
def __init__(self,
model_name: str = "None",
model_path: str = "None",
backend_lib_path: str = "None",
system_lib_path: str = "None",
lora_adapters= None,
runtime : str = Runtime.HTP
) -> None:
"""Load a QNN model from `model_path`

Args:
model_path (str): model path
bin_files (str) :
"""
self.model_path = model_path
self.lora_adapters = lora_adapters

m_lora_adapters = []
for adapter in lora_adapters:
m_lora_adapters.append(adapter.m_adapter)

if self.model_path is None:
raise ValueError("model_path must be specified!")

if not os.path.exists(self.model_path):
raise ValueError(f"Model path does not exist: {self.model_path}")

if (backend_lib_path == "None"):
backend_lib_path = g_backend_lib_path
if (system_lib_path == "None"):
system_lib_path = g_system_lib_path

self.m_context = appbuilder.QNNContext(model_name, model_path,
backend_lib_path, system_lib_path,
m_lora_adapters)

#@timer
def Inference(self, input, perf_profile = PerfProfile.DEFAULT):
return self.m_context.Inference(input, perf_profile)

#@timer
def __del__(self):
if hasattr(self, "m_context") and self.m_context is not None:
del(self.m_context)
m_context = None


class QNNContext:
"""High-level Python wrapper for a AppBuilder model."""
def __init__(self,
Expand Down Expand Up @@ -144,7 +193,7 @@ def __init__(self,
if (system_lib_path == "None"):
system_lib_path = g_system_lib_path

self.m_context = appbuilder.QNNContext(model_name, model_path, backend_lib_path, system_lib_path)
self.m_context = appbuilder.QNNContext(model_name, model_path, backend_lib_path, system_lib_path, [])

#@timer
def Inference(self, input, perf_profile = PerfProfile.DEFAULT):
Expand Down Expand Up @@ -199,7 +248,6 @@ def __del__(self):
del(self.m_context)
m_context = None


class QNNShareMemory:
"""High-level Python wrapper for a AppBuilder model."""
def __init__(self,
Expand All @@ -220,4 +268,10 @@ def __del__(self):
if hasattr(self, "m_memory") and self.m_memory is not None:
del(self.m_memory)
m_memory = None


class LoraAdapter: # this will just hold data
m_adapter = None

def __init__(self, graph_name, lora_file_paths):
self.m_adapter = appbuilder.LoraAdapter(graph_name, lora_file_paths) # cpp object

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from setuptools import Extension, setup, find_packages
from setuptools.command.build_ext import build_ext

VERSION = "2.28.0"
VERSION = "2.28.2"
CONFIG = "Release" # Release, RelWithDebInfo
package_name = "qai_appbuilder"

Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ set(APP_SOURCES "QnnSampleApp.cpp"
"Utils/IOTensor.cpp"
"Utils/QnnSampleAppUtils.cpp"
"WrapperUtils/QnnWrapperUtils.cpp"
"LibAppBuilder.cpp")
"LibAppBuilder.cpp"
"Lora.cpp")

if (WIN32)
set(APP_SOURCES_ARCH "PAL/src/windows/Common.cpp"
Expand Down
26 changes: 20 additions & 6 deletions src/LibAppBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "PAL/DynamicLoading.hpp"
#include "PAL/GetOpt.hpp"
#include "QnnSampleApp.hpp"
#include "Lora.hpp"
#include "QnnSampleAppUtils.hpp"
#include "LibAppBuilder.hpp"
#ifdef _WIN32
Expand Down Expand Up @@ -50,7 +51,8 @@ namespace libappbuilder {
std::unique_ptr<sample_app::QnnSampleApp> initQnnSampleApp(std::string cachedBinaryPath,
std::string backEndPath,
std::string systemLibraryPath,
bool loadFromCachedBinary) {
bool loadFromCachedBinary,
const std::vector<LoraAdapter>& lora_adapters) {
// Just keep blank for below paths.
std::string modelPath;
std::string cachedBinaryPath2;
Expand Down Expand Up @@ -100,7 +102,7 @@ std::unique_ptr<sample_app::QnnSampleApp> initQnnSampleApp(std::string cachedBin
sg_qnnInterface = qnnFunctionPointers.qnnInterface;
std::unique_ptr<sample_app::QnnSampleApp> app(new sample_app::QnnSampleApp(qnnFunctionPointers, "null", opPackagePaths, sg_backendHandle, "null",
debug, parsedOutputDataType, parsedInputDataType, sg_parsedProfilingLevel,
dumpOutputs, cachedBinaryPath2, saveBinaryName));
dumpOutputs, cachedBinaryPath2, saveBinaryName, lora_adapters));
return app;
}

Expand Down Expand Up @@ -285,9 +287,9 @@ bool DeleteShareMemory(std::string share_memory_name) {
}

bool ModelInitializeEx(const std::string& model_name, const std::string& proc_name, const std::string& model_path,
const std::string& backend_lib_path, const std::string& system_lib_path) {
const std::string& backend_lib_path, const std::string& system_lib_path,
const std::vector<LoraAdapter>& lora_adapters) {
bool result = false;

QNN_INF("LibAppBuilder::ModelInitialize: %s \n", model_name.c_str());

#ifdef _WIN32
Expand Down Expand Up @@ -322,7 +324,7 @@ bool ModelInitializeEx(const std::string& model_name, const std::string& proc_na
}

{
std::unique_ptr<sample_app::QnnSampleApp> app = libappbuilder::initQnnSampleApp(cachedBinaryPath, backEndPath, systemLibraryPath, loadFromCachedBinary);
std::unique_ptr<sample_app::QnnSampleApp> app = libappbuilder::initQnnSampleApp(cachedBinaryPath, backEndPath, systemLibraryPath, loadFromCachedBinary, lora_adapters);

if (nullptr == app) {
return false;
Expand Down Expand Up @@ -390,6 +392,12 @@ bool ModelInitializeEx(const std::string& model_name, const std::string& proc_na
}
}

// apply lora Adapter on graph
if (app->binaryUpdates() &&
sample_app::StatusCode::SUCCESS != app->contextApplyBinarySection(QNN_CONTEXT_SECTION_UPDATABLE)) {
return app->reportError("Binary update/execution failure");
}

timerHelper.Print("model_initialize");

sg_model_map.insert(std::make_pair(model_name, std::move(app)));
Expand Down Expand Up @@ -510,7 +518,13 @@ bool LibAppBuilder::ModelInitialize(const std::string& model_name, const std::st

bool LibAppBuilder::ModelInitialize(const std::string& model_name, const std::string& model_path,
const std::string& backend_lib_path, const std::string& system_lib_path) {
return ModelInitializeEx(model_name, "", model_path, backend_lib_path, system_lib_path);
std::vector<LoraAdapter> Adapters = std::vector<LoraAdapter>();
return ModelInitializeEx(model_name, "", model_path, backend_lib_path, system_lib_path, Adapters);
}

bool LibAppBuilder::ModelInitialize(const std::string& model_name, const std::string& model_path,
const std::string& backend_lib_path, const std::string& system_lib_path,const std::vector<LoraAdapter>& lora_adapters) {
return ModelInitializeEx(model_name, "", model_path, backend_lib_path, system_lib_path, lora_adapters);
}

bool LibAppBuilder::ModelInference(std::string model_name, std::string proc_name, std::string share_memory_name,
Expand Down
4 changes: 4 additions & 0 deletions src/LibAppBuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <string>
#include <vector>
#include <chrono>
#include "Lora.hpp"

#ifdef _WIN32
#ifdef DLL_EXPORTS
Expand Down Expand Up @@ -54,6 +55,9 @@ class LIBAPPBUILDER_API LibAppBuilder
bool ModelInitialize(const std::string& model_name, const std::string& proc_name, const std::string& model_path,
const std::string& backend_lib_path, const std::string& system_lib_path);

bool ModelInitialize(const std::string& model_name, const std::string& model_path,
const std::string& backend_lib_path, const std::string& system_lib_path, const std::vector<LoraAdapter>& lora_adapters);

bool ModelInference(std::string model_name, std::vector<uint8_t*>& inputBuffers,
std::vector<uint8_t*>& outputBuffers, std::vector<size_t>& outputSize,
std::string& perfProfile);
Expand Down
21 changes: 21 additions & 0 deletions src/Lora.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//==============================================================================
//
// Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved.
//
// SPDX-License-Identifier: BSD-3-Clause
//
//==============================================================================

#pragma once
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add license header to new files.

//==============================================================================
//
// Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved.
//
// SPDX-License-Identifier: BSD-3-Clause
//
//==============================================================================


#include <string>
#include <vector>
#include "Lora.hpp"

LoraAdapter::LoraAdapter(const std::string &graph_name, const std::vector<std::string> &bin_paths) {
m_graph_name = graph_name;
m_bin_paths = bin_paths;
}

LoraAdapter::~LoraAdapter(){
}
33 changes: 33 additions & 0 deletions src/Lora.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//==============================================================================
//
// Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved.
//
// SPDX-License-Identifier: BSD-3-Clause
//
//==============================================================================

#pragma once

#include <string>
#include <vector>

#ifdef _WIN32
#ifdef DLL_EXPORTS
#define LIBAPPBUILDER_API __declspec(dllexport)
#else
#define LIBAPPBUILDER_API __declspec(dllimport)
#endif
#else // _WIN32
#define LIBAPPBUILDER_API
#endif


class LIBAPPBUILDER_API LoraAdapter{
public:
std::string m_graph_name;
std::vector<std::string> m_bin_paths;

LoraAdapter(const std::string &graph_name, const std::vector<std::string> &bin_paths);

~LoraAdapter();
};
Loading