Skip to content

Commit

Permalink
Randomization_factor
Browse files Browse the repository at this point in the history
  • Loading branch information
AAbushady committed Jan 24, 2024
1 parent 66477a4 commit 88089db
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 3 deletions.
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ models-mnt
arm_neon.h
compile_commands.json
CMakeSettings.json
/venv/
/cmake-build-debug/
/cmake-build-release/
/.idea/

__pycache__
dist
Expand Down Expand Up @@ -106,3 +110,8 @@ poetry.toml
/tests/test-rope
/tests/test-backend-ops
/tests/test-autorelease
/xaa
/xab
/xac

examples/
3 changes: 2 additions & 1 deletion common/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ static void sampler_queue(
const float temp = params.temp;
const float dynatemp_range = params.dynatemp_range;
const float dynatemp_exponent = params.dynatemp_exponent;
const float randomization_factor = params.randomization_factor;
const int32_t top_k = params.top_k <= 0 ? n_vocab : params.top_k;
const float top_p = params.top_p;
const float min_p = params.min_p;
Expand All @@ -144,7 +145,7 @@ static void sampler_queue(
case 'f': llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); break;
case 'y': llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
case 'p': llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
case 'm': llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
case 'm': llama_sample_min_p (ctx_main, &cur_p, min_p, randomization_factor, min_keep); break;
case 't':
if (dynatemp_range > 0) {
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
Expand Down
1 change: 1 addition & 0 deletions common/sampling.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ typedef struct llama_sampling_params {
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
float dynatemp_range = 0.00f; // 0.0 = disabled
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
float randomization_factor = 0.00f; // 0.0 = disabled
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
float penalty_repeat = 1.10f; // 1.0 = disabled
float penalty_freq = 0.00f; // 0.0 = disabled
Expand Down
2 changes: 2 additions & 0 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ struct llama_server_context
slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
slot->sparams.randomization_factor = json_value(data, "randomization_factor", default_sparams.randomization_factor);
slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
Expand Down Expand Up @@ -1236,6 +1237,7 @@ struct llama_server_context
{"top_k", slot.sparams.top_k},
{"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p},
{"randomization_factor", slot.sparams.randomization_factor},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typical_p},
{"repeat_last_n", slot.sparams.penalty_last_n},
Expand Down
30 changes: 28 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7634,14 +7634,39 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
}
}

void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep) {
void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, float randomization_factor, size_t min_keep) {
if (p <= 0.0f || !candidates->size) {
return;
}

const int64_t t_start_sample_us = ggml_time_us();

llama_sample_softmax(ctx, candidates);
// Check if the randomizationFactor value is above 0 and apply Gaussian noise if so
if (randomization_factor > 0.0) {
// Create a random number generator
std::default_random_engine generator;
std::random_device rd;
generator.seed(rd());
// Create a Gaussian distribution with mean 0 and standard deviation of your choice
std::normal_distribution<float> distribution(0.0f, randomization_factor); // Replace 1.0f with the desired standard deviation

const int64_t t_start_sample_us = ggml_time_us();
// Print the randomization factor read from the file

// Apply Gaussian noise to each logit
for (size_t i = 0; i < candidates->size; ++i) {
// Add Gaussian noise to the logit
candidates->data[i].logit += distribution(generator);
}

candidates->sorted = false;

// Re-normalize probabilities if necessary
llama_sample_softmax(ctx, candidates);
}

// Store original top probability
float original_top_prob = candidates->data[0].p;

float scale = candidates->data[0].p; // scale by max prob
size_t i = 1; // first token always matches
Expand All @@ -7654,6 +7679,7 @@ void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * can

// Resize the output vector to keep only the matching tokens
candidates->size = i;
llama_sample_softmax(ctx, candidates);

if (ctx) {
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
Expand Down
1 change: 1 addition & 0 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ extern "C" {
struct llama_context * ctx,
llama_token_data_array * candidates,
float p,
float randomization_factor,
size_t min_keep);

/// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
Expand Down

0 comments on commit 88089db

Please sign in to comment.