Skip to content

Commit

Permalink
llama : remove redundant keywords (struct, enum)
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Mar 11, 2025
1 parent bcee6dd commit a170669
Show file tree
Hide file tree
Showing 9 changed files with 609 additions and 611 deletions.
2 changes: 1 addition & 1 deletion include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ extern "C" {

LLAMA_API const struct llama_model * llama_get_model (const struct llama_context * ctx); // TODO: remove const?
LLAMA_API struct llama_kv_cache * llama_get_kv_self ( struct llama_context * ctx);
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); // TODO: rename to llama_get_pooling_type

LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
LLAMA_API enum llama_rope_type llama_model_rope_type(const struct llama_model * model);
Expand Down
29 changes: 14 additions & 15 deletions src/llama-adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,21 @@
#include "llama-mmap.h"
#include "llama-model.h"

#include <algorithm>
#include <map>
#include <cassert>
#include <stdexcept>

// vec

struct ggml_tensor * llama_adapter_cvec::tensor_for(int il) const {
ggml_tensor * llama_adapter_cvec::tensor_for(int il) const {
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
return nullptr;
}

return tensors[il];
}

struct ggml_tensor * llama_adapter_cvec::apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
ggml_tensor * llama_adapter_cvec::apply_to(ggml_context * ctx, ggml_tensor * cur, int il) const {
ggml_tensor * layer_dir = tensor_for(il);
if (layer_dir != nullptr) {
cur = ggml_add(ctx, cur, layer_dir);
Expand All @@ -40,7 +39,7 @@ bool llama_adapter_cvec::init(const llama_model & model) {
auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * {
auto it = ctx_map.find(buft);
if (it == ctx_map.end()) {
struct ggml_init_params params = {
ggml_init_params params = {
/*.mem_size =*/ hparams.n_layer*ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
Expand Down Expand Up @@ -135,7 +134,7 @@ bool llama_adapter_cvec::apply(

// lora

llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor * w) {
llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
const std::string name(w->name);

const auto pos = ab_map.find(name);
Expand All @@ -146,11 +145,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor *
return nullptr;
}

static void llama_adapter_lora_init_impl(struct llama_model & model, const char * path_lora, struct llama_adapter_lora & adapter) {
static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);

ggml_context * ctx_init;
struct gguf_init_params meta_gguf_params = {
gguf_init_params meta_gguf_params = {
/* .no_alloc = */ true,
/* .ctx = */ &ctx_init,
};
Expand Down Expand Up @@ -201,7 +200,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
auto it = ctx_map.find(buft);
if (it == ctx_map.end()) {
// add a new context
struct ggml_init_params params = {
ggml_init_params params = {
/*.mem_size =*/ n_tensors*ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
Expand Down Expand Up @@ -264,7 +263,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
throw std::runtime_error("LoRA tensor '" + name + "' does not exist in base model (hint: maybe wrong base model?)");
}

struct ggml_context * dev_ctx = ctx_for_buft(ggml_backend_buffer_get_type(model_tensor->buffer));
ggml_context * dev_ctx = ctx_for_buft(ggml_backend_buffer_get_type(model_tensor->buffer));
// validate tensor shape
if (is_token_embd) {
// expect B to be non-transposed, A and B are flipped; see llm_build_inp_embd()
Expand All @@ -281,8 +280,8 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
}

// save tensor to adapter
struct ggml_tensor * tensor_a = ggml_dup_tensor(dev_ctx, w.a);
struct ggml_tensor * tensor_b = ggml_dup_tensor(dev_ctx, w.b);
ggml_tensor * tensor_a = ggml_dup_tensor(dev_ctx, w.a);
ggml_tensor * tensor_b = ggml_dup_tensor(dev_ctx, w.b);
ggml_set_name(tensor_a, w.a->name);
ggml_set_name(tensor_b, w.b->name);
adapter.ab_map[name] = llama_adapter_lora_weight(tensor_a, tensor_b);
Expand All @@ -308,7 +307,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
{
llama_file gguf_file(path_lora, "rb");
std::vector<uint8_t> read_buf;
auto set_tensor = [&](struct ggml_tensor * orig, struct ggml_tensor * dev) {
auto set_tensor = [&](ggml_tensor * orig, ggml_tensor * dev) {
size_t offs = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), gguf_find_tensor(ctx_gguf.get(), orig->name));
size_t size = ggml_nbytes(orig);
read_buf.resize(size);
Expand All @@ -327,8 +326,8 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
}

struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model, const char * path_lora) {
struct llama_adapter_lora * adapter = new llama_adapter_lora();
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
llama_adapter_lora * adapter = new llama_adapter_lora();

try {
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
Expand All @@ -342,6 +341,6 @@ struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model,
return nullptr;
}

void llama_adapter_lora_free(struct llama_adapter_lora * adapter) {
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
delete adapter;
}
18 changes: 9 additions & 9 deletions src/llama-adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
//

struct llama_adapter_cvec {
struct ggml_tensor * tensor_for(int il) const;
ggml_tensor * tensor_for(int il) const;

struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const;
ggml_tensor * apply_to(ggml_context * ctx, ggml_tensor * cur, int il) const;

bool apply(
const llama_model & model,
Expand All @@ -36,16 +36,16 @@ struct llama_adapter_cvec {
std::vector<ggml_context_ptr> ctxs;
std::vector<ggml_backend_buffer_ptr> bufs;

std::vector<struct ggml_tensor *> tensors; // per layer
std::vector<ggml_tensor *> tensors; // per layer
};

//
// llama_adapter_lora
//

struct llama_adapter_lora_weight {
struct ggml_tensor * a = nullptr;
struct ggml_tensor * b = nullptr;
ggml_tensor * a = nullptr;
ggml_tensor * b = nullptr;

// get actual scale based on rank and alpha
float get_scale(float alpha, float adapter_scale) const {
Expand All @@ -55,12 +55,12 @@ struct llama_adapter_lora_weight {
}

llama_adapter_lora_weight() = default;
llama_adapter_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {}
llama_adapter_lora_weight(ggml_tensor * a, ggml_tensor * b) : a(a), b(b) {}
};

struct llama_adapter_lora {
// map tensor name to lora_a_b
std::unordered_map<std::string, struct llama_adapter_lora_weight> ab_map;
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;

std::vector<ggml_context_ptr> ctxs;
std::vector<ggml_backend_buffer_ptr> bufs;
Expand All @@ -70,7 +70,7 @@ struct llama_adapter_lora {
llama_adapter_lora() = default;
~llama_adapter_lora() = default;

llama_adapter_lora_weight * get_weight(struct ggml_tensor * w);
llama_adapter_lora_weight * get_weight(ggml_tensor * w);
};

using llama_adapter_loras = std::unordered_map<struct llama_adapter_lora *, float>;
using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
Loading

0 comments on commit a170669

Please sign in to comment.