Skip to content

Commit

Permalink
common : remove duplicate function llama_should_add_bos_token (#8778)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylo5aby authored Aug 15, 2024
1 parent 6bda7ce commit 4af8420
Show file tree
Hide file tree
Showing 14 changed files with 26 additions and 40 deletions.
6 changes: 0 additions & 6 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2702,12 +2702,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
return text;
}

bool llama_should_add_bos_token(const llama_model * model) {
const int add_bos = llama_add_bos_token(model);

return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
}

//
// Chat template utils
//
Expand Down
4 changes: 0 additions & 4 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,6 @@ std::string llama_detokenize(
const std::vector<llama_token> & tokens,
bool special = true);

// Uses the value from the model metadata if possible, otherwise
// defaults to true when model type is SPM, otherwise false.
bool llama_should_add_bos_token(const llama_model * model);

//
// Chat template utils
//
Expand Down
2 changes: 1 addition & 1 deletion examples/cvector-generator/cvector-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ struct tokenized_prompt {
size_t max_seq_len;

tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
Expand Down
2 changes: 1 addition & 1 deletion examples/eval-callback/eval-callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
}

static bool run(llama_context * ctx, const gpt_params & params) {
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));

std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);

Expand Down
4 changes: 2 additions & 2 deletions examples/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ static void process_logits(
}

static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
const int n_ctx = llama_n_ctx(ctx);

auto tim1 = std::chrono::high_resolution_clock::now();
Expand Down
4 changes: 2 additions & 2 deletions examples/infill/infill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ int main(int argc, char ** argv) {
LOG_TEE("\n");
LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
}
const bool add_bos = llama_should_add_bos_token(model);
GGML_ASSERT(llama_add_eos_token(model) != 1);
const bool add_bos = llama_add_bos_token(model);
GGML_ASSERT(!llama_add_eos_token(model));
LOG("add_bos: %d\n", add_bos);

std::vector<llama_token> embd_inp;
Expand Down
4 changes: 2 additions & 2 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,9 @@ int main(int argc, char ** argv) {
}
}

const bool add_bos = llama_should_add_bos_token(model);
const bool add_bos = llama_add_bos_token(model);
if (!llama_model_has_encoder(model)) {
GGML_ASSERT(llama_add_eos_token(model) != 1);
GGML_ASSERT(!llama_add_eos_token(model));
}
LOG("add_bos: %d\n", add_bos);

Expand Down
12 changes: 6 additions & 6 deletions examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
// Output: `perplexity: 13.5106 [114/114]`
// BOS tokens will be added for each chunk before eval

const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

fprintf(stderr, "%s: tokenizing the input ..\n", __func__);

Expand Down Expand Up @@ -480,8 +480,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
// Output: `perplexity: 13.5106 [114/114]`
// BOS tokens will be added for each chunk before eval

const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

std::ofstream logits_stream;
if (!params.logits_file.empty()) {
Expand Down Expand Up @@ -1733,8 +1733,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
const int n_batch = params.n_batch;
const int num_batches = (n_ctx + n_batch - 1)/n_batch;
const int nv = 2*((n_vocab + 1)/2) + 4;
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
std::vector<float> kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);
Expand Down
7 changes: 3 additions & 4 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,8 @@ struct server_context {

n_ctx = llama_n_ctx(ctx);

add_bos_token = llama_should_add_bos_token(model);
has_eos_token = llama_add_eos_token(model) != 1;

add_bos_token = llama_add_bos_token(model);
has_eos_token = !llama_add_eos_token(model);
return true;
}

Expand Down Expand Up @@ -2038,7 +2037,7 @@ struct server_context {
slot.t_start_generation = 0;

if (slot.infill) {
const bool add_bos = llama_should_add_bos_token(model);
const bool add_bos = llama_add_bos_token(model);
bool suff_rm_leading_spc = true;
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
params.input_suffix.erase(0, 1);
Expand Down
2 changes: 1 addition & 1 deletion examples/tokenize/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
prompt = stdin_buffer.str();
}

const bool model_wants_add_bos = llama_should_add_bos_token(model);
const bool model_wants_add_bos = llama_add_bos_token(model);
const bool add_bos = model_wants_add_bos && !no_bos;
const bool parse_special = !no_parse_special;

Expand Down
7 changes: 2 additions & 5 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -914,11 +914,8 @@ extern "C" {
LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding

// Returns -1 if unknown, 1 for true or 0 for false.
LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);

// Returns -1 if unknown, 1 for true or 0 for false.
LLAMA_API int32_t llama_add_eos_token(const struct llama_model * model);
LLAMA_API bool llama_add_bos_token(const struct llama_model * model);
LLAMA_API bool llama_add_eos_token(const struct llama_model * model);

// Codellama infill tokens
LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
Expand Down
4 changes: 2 additions & 2 deletions src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1468,11 +1468,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {
return vocab.special_pad_id;
}

int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {
bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {
return vocab.tokenizer_add_bos;
}

int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {
bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
return vocab.tokenizer_add_eos;
}

Expand Down
4 changes: 2 additions & 2 deletions src/llama-vocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
llama_token llama_token_pad_impl(const struct llama_vocab & vocab);

int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab);
int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab);
bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
bool llama_add_eos_token_impl(const struct llama_vocab & vocab);

llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
Expand Down
4 changes: 2 additions & 2 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18705,11 +18705,11 @@ llama_token llama_token_pad(const struct llama_model * model) {
return llama_token_pad_impl(model->vocab);
}

int32_t llama_add_bos_token(const struct llama_model * model) {
bool llama_add_bos_token(const struct llama_model * model) {
return llama_add_bos_token_impl(model->vocab);
}

int32_t llama_add_eos_token(const struct llama_model * model) {
bool llama_add_eos_token(const struct llama_model * model) {
return llama_add_eos_token_impl(model->vocab);
}

Expand Down

0 comments on commit 4af8420

Please sign in to comment.