From b74603db59ee99712439a38da2550ab692eec52d Mon Sep 17 00:00:00 2001 From: kingbri Date: Wed, 20 Mar 2024 01:17:04 -0400 Subject: [PATCH] Model: Log metrics before yielding a stop Yielding the finish reason before the logging causes the function to terminate early. Instead, log before yielding and breaking out of the generation loop. Signed-off-by: kingbri --- backends/exllamav2/model.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 18eb626d..1e05d041 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -975,19 +975,19 @@ def generate_gen_sync(self, prompt: str, **kwargs): last_chunk_time = now if eos or generated_tokens == max_tokens: + # Print response + log_response(full_response) + + # Print metrics + elapsed_time = last_chunk_time - start_time + context_len = None if ids is None else context_len + + log_metrics( + generated_tokens, elapsed_time, context_len, self.config.max_seq_len + ) + finish_reason = "length" if generated_tokens == max_tokens else "stop" generation = {"finish_reason": finish_reason} yield generation break - - # Print response - log_response(full_response) - - # Print metrics - elapsed_time = last_chunk_time - start_time - context_len = None if ids is None else context_len - - log_metrics( - generated_tokens, elapsed_time, context_len, self.config.max_seq_len - )