Skip to content

Commit

Permalink
llama : impl
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Dec 23, 2024
1 parent b0d6b66 commit a7df071
Show file tree
Hide file tree
Showing 16 changed files with 230 additions and 209 deletions.
10 changes: 7 additions & 3 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,10 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
// Split utils
//

static const char * const LLM_KV_SPLIT_NO = "split.no";
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
namespace {

const char * const LLM_KV_SPLIT_NO = "split.no";
const char * const LLM_KV_SPLIT_COUNT = "split.count";
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";

}
7 changes: 3 additions & 4 deletions examples/gguf-split/gguf-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
#include "common.h"

#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <string>
#include <vector>

#include <stdio.h>
#include <string.h>
#include <climits>

#include <cstdio>
#include <cstring>
#include <stdexcept>

#if defined(_WIN32)
Expand Down
16 changes: 7 additions & 9 deletions examples/quantize-stats/quantize-stats.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
#include "common.h"
#include "ggml.h"
#include "llama.h"
#include "llama-impl.h"
#include "llama-context.h"
#include "common.h"

#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <map>
#include <numeric>
#include <regex>
#include <string>
#include <unordered_map>
#include <vector>
#include <thread>
#include <mutex>
Expand Down Expand Up @@ -330,13 +328,13 @@ int main(int argc, char ** argv) {
}
}

const auto &tensors = llama_internal_get_tensor_map(ctx);
const auto & tensors = llama_internal_get_tensor_map(ctx);

// check layer tensors
int included_layers = 0;
int64_t max_nelements = 0;
bool is_f16 = false;
for (const auto& kv_tensor : tensors) {
for (const auto & kv_tensor : tensors) {
if (!layer_included(params, kv_tensor.first)) {
continue;
}
Expand Down Expand Up @@ -371,8 +369,8 @@ int main(int argc, char ** argv) {
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
continue;
}
const auto * qfns = ggml_get_type_traits(type);
const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
const auto * qfns = ggml_get_type_traits(type);
const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
if (qfns_cpu->from_float && qfns->to_float) {
if (params.verbose) {
printf("testing %s ...\n", ggml_type_name(type));
Expand All @@ -382,7 +380,7 @@ int main(int argc, char ** argv) {

error_stats global_stats {};

for (const auto& kv_tensor : tensors) {
for (const auto & kv_tensor : tensors) {
if (!layer_included(params, kv_tensor.first)) {
continue;
}
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ add_library(llama
llama-chat.cpp
llama-context.cpp
llama-hparams.cpp
llama-impl.cpp
llama-grammar.cpp
llama-kv-cache.cpp
llama-mmap.cpp
Expand Down
1 change: 1 addition & 0 deletions src/llama-adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <algorithm>
#include <map>
#include <cassert>
#include <stdexcept>

// vec

Expand Down
6 changes: 4 additions & 2 deletions src/llama-batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ struct llama_ubatch {

struct llama_sbatch_seq {
int32_t n_seq_id;

llama_seq_id * seq_id;

size_t offset;
size_t length;
};
Expand Down Expand Up @@ -112,8 +114,8 @@ struct llama_sbatch {
if (ubatch.equal_seqs) {
for (size_t i = 0; i < length; ++i) {
memcpy(
ubatch.embd + n_embd * (ubatch.n_tokens + i),
batch->embd + n_embd * ids[seq.offset + i],
ubatch.embd + (n_embd * (ubatch.n_tokens + i)),
batch->embd + (n_embd * ids[seq.offset + i]),
n_embd * sizeof(float)
);
}
Expand Down
7 changes: 7 additions & 0 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "llama-context.h"

#include <stdexcept>

// deprecated
size_t llama_get_state_size(struct llama_context * ctx) {
return llama_state_get_size(ctx);
Expand Down Expand Up @@ -968,3 +970,8 @@ size_t llama_state_seq_load_file(struct llama_context * ctx, const char * filepa
}
}

const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
struct llama_context * ctx
) {
return ctx->model.tensors_by_name;
}
4 changes: 4 additions & 0 deletions src/llama-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
out_ids.clear();
}
}

// For internal test use
// TODO: remove
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(struct llama_context * ctx);
1 change: 1 addition & 0 deletions src/llama-grammar.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "llama-grammar.h"

#include "llama-impl.h"
#include "llama-vocab.h"
#include "llama-sampling.h"

Expand Down
4 changes: 3 additions & 1 deletion src/llama-grammar.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#pragma once

#include "llama-impl.h"
#include "llama.h"

#include <map>
#include <string>
#include <vector>

struct llama_vocab;

Expand Down
74 changes: 74 additions & 0 deletions src/llama-impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include "llama-impl.h"

#include "llama.h"

#include <cstdarg>

struct llama_logger_state {
ggml_log_callback log_callback = llama_log_callback_default;
void * log_callback_user_data = nullptr;
};

static llama_logger_state g_logger_state;

time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}

time_meas::~time_meas() {
if (t_start_us >= 0) {
t_acc += ggml_time_us() - t_start_us;
}
}

void replace_all(std::string & s, const std::string & search, const std::string & replace) {
if (search.empty()) {
return;
}
std::string builder;
builder.reserve(s.length());
size_t pos = 0;
size_t last_pos = 0;
while ((pos = s.find(search, last_pos)) != std::string::npos) {
builder.append(s, last_pos, pos - last_pos);
builder.append(replace);
last_pos = pos + search.length();
}
builder.append(s, last_pos, std::string::npos);
s = std::move(builder);
}

void llama_log_set(ggml_log_callback log_callback, void * user_data) {
ggml_log_set(log_callback, user_data);
g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
g_logger_state.log_callback_user_data = user_data;
}

static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) {
va_list args_copy;
va_copy(args_copy, args);
char buffer[128];
int len = vsnprintf(buffer, 128, format, args);
if (len < 128) {
g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
} else {
char * buffer2 = new char[len + 1];
vsnprintf(buffer2, len + 1, format, args_copy);
buffer2[len] = 0;
g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
delete[] buffer2;
}
va_end(args_copy);
}

void llama_log_internal(ggml_log_level level, const char * format, ...) {
va_list args;
va_start(args, format);
llama_log_internal_v(level, format, args);
va_end(args);
}

void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
(void) level;
(void) user_data;
fputs(text, stderr);
fflush(stderr);
}
Loading

0 comments on commit a7df071

Please sign in to comment.