Skip to content

Commit

Permalink
feat: Update llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
abetlen committed Dec 19, 2024
1 parent 803924b commit 2bc1d97
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 46 deletions.
5 changes: 0 additions & 5 deletions llama_cpp/_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,15 +805,10 @@ def add_penalties(
ignore_eos: bool,
):
sampler = llama_cpp.llama_sampler_init_penalties(
n_vocab,
special_eos_id,
linefeed_id,
penalty_last_n,
penalty_repeat,
penalty_freq,
penalty_present,
penalize_nl,
ignore_eos,
)
self._add_sampler(sampler)

Expand Down
54 changes: 14 additions & 40 deletions llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,17 @@
# // note: these values should be synchronized with ggml_rope
# // TODO: maybe move this enum to ggml.h (ggml_rope_type)
# enum llama_rope_type {
# LLAMA_ROPE_TYPE_NONE = -1,
# LLAMA_ROPE_TYPE_NORM = 0,
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
# LLAMA_ROPE_TYPE_NONE = -1,
# LLAMA_ROPE_TYPE_NORM = 0,
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
# LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
# LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
# };
LLAMA_ROPE_TYPE_NONE = -1
LLAMA_ROPE_TYPE_NORM = 0
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2
LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE = 8
LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION = 24


# enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
Expand Down Expand Up @@ -1265,6 +1269,7 @@ def llama_rope_freq_scale_train(model: llama_model_p, /) -> float:
# // Functions to access the model's GGUF metadata scalar values
# // - The functions return the length of the string on success, or -1 on failure
# // - The output string is always null-terminated and cleared on failure
# // - When retrieving a string, an extra byte must be allocated to account for the null terminator
# // - GGUF array values are not supported by these functions


Expand Down Expand Up @@ -1378,18 +1383,6 @@ def llama_model_n_params(model: llama_model_p, /) -> int:
...


# // Get a llama model tensor
# LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
@ctypes_function(
"llama_get_model_tensor", [llama_model_p_ctypes, ctypes.c_char_p], ctypes.c_void_p
)
def llama_get_model_tensor(
model: llama_model_p, name: Union[ctypes.c_char_p, bytes], /
) -> ctypes.c_void_p:
"""Get a llama model tensor"""
...


# // Returns true if the model contains an encoder that requires llama_encode() call
# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
@ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
Expand Down Expand Up @@ -3336,41 +3329,22 @@ def llama_sampler_init_grammar(
...


# /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
# LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
# int32_t n_vocab, // llama_n_vocab()
# llama_token special_eos_id, // llama_token_eos()
# llama_token linefeed_id, // llama_token_nl()
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
# float penalty_repeat, // 1.0 = disabled
# float penalty_freq, // 0.0 = disabled
# float penalty_present, // 0.0 = disabled
# bool penalize_nl, // consider newlines as a repeatable token
# bool ignore_eos); // ignore the end-of-sequence token
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
# float penalty_repeat, // 1.0 = disabled
# float penalty_freq, // 0.0 = disabled
# float penalty_present); // 0.0 = disabled
@ctypes_function(
"llama_sampler_init_penalties",
[
ctypes.c_int32,
llama_token,
llama_token,
ctypes.c_int32,
ctypes.c_float,
ctypes.c_float,
ctypes.c_float,
ctypes.c_bool,
ctypes.c_bool,
],
[ctypes.c_int32, ctypes.c_float, ctypes.c_float, ctypes.c_float],
llama_sampler_p_ctypes,
)
def llama_sampler_init_penalties(
n_vocab: int,
special_eos_id: int,
linefeed_id: int,
penalty_last_n: int,
penalty_repeat: float,
penalty_freq: float,
penalty_present: float,
penalize_nl: bool,
ignore_eos: bool,
/,
) -> llama_sampler_p:
...
Expand Down
2 changes: 1 addition & 1 deletion vendor/llama.cpp
Submodule llama.cpp updated 160 files

0 comments on commit 2bc1d97

Please sign in to comment.